[RS6000] PC-relative TLS support
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob32101b77ea3777f95bc9e1fbf42b0131931432d2
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "params.h"
66 #include "tm-constrs.h"
67 #include "tree-vectorizer.h"
68 #include "target-globals.h"
69 #include "builtins.h"
70 #include "tree-vector-builder.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #include "case-cfn-macros.h"
78 #include "ppc-auxv.h"
79 #include "tree-ssa-propagate.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "rs6000-internal.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load;
104 /* Set to nonzero once AIX common-mode calls have been defined. */
105 static GTY(()) int common_mode_defined;
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
166 static int dbg_cost_ctrl;
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
187 extern GTY(()) section *toc_section;
188 section *toc_section = 0;
190 /* Describe the vector unit used for modes. */
191 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
192 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
194 /* Register classes for various constraints that are based on the target
195 switches. */
196 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
198 /* Describe the alignment of a vector. */
199 int rs6000_vector_align[NUM_MACHINE_MODES];
201 /* Map selected modes to types for builtins. */
202 GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
204 /* What modes to automatically generate reciprocal divide estimate (fre) and
205 reciprocal sqrt (frsqrte) for. */
206 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
208 /* Masks to determine which reciprocal esitmate instructions to generate
209 automatically. */
210 enum rs6000_recip_mask {
211 RECIP_SF_DIV = 0x001, /* Use divide estimate */
212 RECIP_DF_DIV = 0x002,
213 RECIP_V4SF_DIV = 0x004,
214 RECIP_V2DF_DIV = 0x008,
216 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
217 RECIP_DF_RSQRT = 0x020,
218 RECIP_V4SF_RSQRT = 0x040,
219 RECIP_V2DF_RSQRT = 0x080,
221 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_NONE = 0,
223 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
224 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
225 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
227 RECIP_HIGH_PRECISION = RECIP_ALL,
229 /* On low precision machines like the power5, don't enable double precision
230 reciprocal square root estimate, since it isn't accurate enough. */
231 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
234 /* -mrecip options. */
235 static struct
237 const char *string; /* option name */
238 unsigned int mask; /* mask bits to set */
239 } recip_options[] = {
240 { "all", RECIP_ALL },
241 { "none", RECIP_NONE },
242 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
243 | RECIP_V2DF_DIV) },
244 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
245 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
246 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
247 | RECIP_V2DF_RSQRT) },
248 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
249 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
252 /* On PowerPC, we have a limited number of target clones that we care about
253 which means we can use an array to hold the options, rather than having more
254 elaborate data structures to identify each possible variation. Order the
255 clones from the default to the highest ISA. */
256 enum {
257 CLONE_DEFAULT = 0, /* default clone. */
258 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
259 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
260 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
261 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
262 CLONE_MAX
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
280 /* Newer LIBCs explicitly export this symbol to declare that they provide
281 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
282 reference to this symbol whenever we expand a CPU builtin, so that
283 we never link against an old LIBC. */
284 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286 /* True if we have expanded a CPU builtin. */
287 bool cpu_builtin_p = false;
289 /* Pointer to function (in rs6000-c.c) that can define or undefine target
290 macros that have changed. Languages that don't support the preprocessor
291 don't link in rs6000-c.c, so we can't call it directly. */
292 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294 /* Simplfy register classes into simpler classifications. We assume
295 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
296 check for standard register classes (gpr/floating/altivec/vsx) and
297 floating/vector classes (float/altivec/vsx). */
299 enum rs6000_reg_type {
300 NO_REG_TYPE,
301 PSEUDO_REG_TYPE,
302 GPR_REG_TYPE,
303 VSX_REG_TYPE,
304 ALTIVEC_REG_TYPE,
305 FPR_REG_TYPE,
306 SPR_REG_TYPE,
307 CR_REG_TYPE
310 /* Map register class to register type. */
311 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313 /* First/last register type for the 'normal' register types (i.e. general
314 purpose, floating point, altivec, and VSX registers). */
315 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
320 /* Register classes we care about in secondary reload or go if legitimate
321 address. We only need to worry about GPR, FPR, and Altivec registers here,
322 along an ANY field that is the OR of the 3 register classes. */
324 enum rs6000_reload_reg_type {
325 RELOAD_REG_GPR, /* General purpose registers. */
326 RELOAD_REG_FPR, /* Traditional floating point regs. */
327 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
328 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
329 N_RELOAD_REG
332 /* For setting up register classes, loop through the 3 register classes mapping
333 into real registers, and skip the ANY class, which is just an OR of the
334 bits. */
335 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
336 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338 /* Map reload register type to a register in the register class. */
339 struct reload_reg_map_type {
340 const char *name; /* Register class name. */
341 int reg; /* Register in the register class. */
344 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
345 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
346 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
347 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
348 { "Any", -1 }, /* RELOAD_REG_ANY. */
351 /* Mask bits for each register class, indexed per mode. Historically the
352 compiler has been more restrictive which types can do PRE_MODIFY instead of
353 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
354 typedef unsigned char addr_mask_type;
356 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
357 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
358 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
359 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
360 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
361 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
362 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
363 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365 /* Register type masks based on the type, of valid addressing modes. */
366 struct rs6000_reg_addr {
367 enum insn_code reload_load; /* INSN to reload for loading. */
368 enum insn_code reload_store; /* INSN to reload for storing. */
369 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
370 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
371 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
372 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
373 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
376 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
379 static inline bool
380 mode_supports_pre_incdec_p (machine_mode mode)
382 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
383 != 0);
386 /* Helper function to say whether a mode supports PRE_MODIFY. */
387 static inline bool
388 mode_supports_pre_modify_p (machine_mode mode)
390 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
391 != 0);
394 /* Return true if we have D-form addressing in altivec registers. */
395 static inline bool
396 mode_supports_vmx_dform (machine_mode mode)
398 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
401 /* Return true if we have D-form addressing in VSX registers. This addressing
402 is more limited than normal d-form addressing in that the offset must be
403 aligned on a 16-byte boundary. */
404 static inline bool
405 mode_supports_dq_form (machine_mode mode)
407 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
408 != 0);
411 /* Given that there exists at least one variable that is set (produced)
412 by OUT_INSN and read (consumed) by IN_INSN, return true iff
413 IN_INSN represents one or more memory store operations and none of
414 the variables set by OUT_INSN is used by IN_INSN as the address of a
415 store operation. If either IN_INSN or OUT_INSN does not represent
416 a "single" RTL SET expression (as loosely defined by the
417 implementation of the single_set function) or a PARALLEL with only
418 SETs, CLOBBERs, and USEs inside, this function returns false.
420 This rs6000-specific version of store_data_bypass_p checks for
421 certain conditions that result in assertion failures (and internal
422 compiler errors) in the generic store_data_bypass_p function and
423 returns false rather than calling store_data_bypass_p if one of the
424 problematic conditions is detected. */
427 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 rtx out_set, in_set;
430 rtx out_pat, in_pat;
431 rtx out_exp, in_exp;
432 int i, j;
434 in_set = single_set (in_insn);
435 if (in_set)
437 if (MEM_P (SET_DEST (in_set)))
439 out_set = single_set (out_insn);
440 if (!out_set)
442 out_pat = PATTERN (out_insn);
443 if (GET_CODE (out_pat) == PARALLEL)
445 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 out_exp = XVECEXP (out_pat, 0, i);
448 if ((GET_CODE (out_exp) == CLOBBER)
449 || (GET_CODE (out_exp) == USE))
450 continue;
451 else if (GET_CODE (out_exp) != SET)
452 return false;
458 else
460 in_pat = PATTERN (in_insn);
461 if (GET_CODE (in_pat) != PARALLEL)
462 return false;
464 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 in_exp = XVECEXP (in_pat, 0, i);
467 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
468 continue;
469 else if (GET_CODE (in_exp) != SET)
470 return false;
472 if (MEM_P (SET_DEST (in_exp)))
474 out_set = single_set (out_insn);
475 if (!out_set)
477 out_pat = PATTERN (out_insn);
478 if (GET_CODE (out_pat) != PARALLEL)
479 return false;
480 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 out_exp = XVECEXP (out_pat, 0, j);
483 if ((GET_CODE (out_exp) == CLOBBER)
484 || (GET_CODE (out_exp) == USE))
485 continue;
486 else if (GET_CODE (out_exp) != SET)
487 return false;
493 return store_data_bypass_p (out_insn, in_insn);
497 /* Processor costs (relative to an add) */
499 const struct processor_costs *rs6000_cost;
501 /* Instruction size costs on 32bit processors. */
502 static const
503 struct processor_costs size32_cost = {
504 COSTS_N_INSNS (1), /* mulsi */
505 COSTS_N_INSNS (1), /* mulsi_const */
506 COSTS_N_INSNS (1), /* mulsi_const9 */
507 COSTS_N_INSNS (1), /* muldi */
508 COSTS_N_INSNS (1), /* divsi */
509 COSTS_N_INSNS (1), /* divdi */
510 COSTS_N_INSNS (1), /* fp */
511 COSTS_N_INSNS (1), /* dmul */
512 COSTS_N_INSNS (1), /* sdiv */
513 COSTS_N_INSNS (1), /* ddiv */
514 32, /* cache line size */
515 0, /* l1 cache */
516 0, /* l2 cache */
517 0, /* streams */
518 0, /* SF->DF convert */
521 /* Instruction size costs on 64bit processors. */
522 static const
523 struct processor_costs size64_cost = {
524 COSTS_N_INSNS (1), /* mulsi */
525 COSTS_N_INSNS (1), /* mulsi_const */
526 COSTS_N_INSNS (1), /* mulsi_const9 */
527 COSTS_N_INSNS (1), /* muldi */
528 COSTS_N_INSNS (1), /* divsi */
529 COSTS_N_INSNS (1), /* divdi */
530 COSTS_N_INSNS (1), /* fp */
531 COSTS_N_INSNS (1), /* dmul */
532 COSTS_N_INSNS (1), /* sdiv */
533 COSTS_N_INSNS (1), /* ddiv */
534 128, /* cache line size */
535 0, /* l1 cache */
536 0, /* l2 cache */
537 0, /* streams */
538 0, /* SF->DF convert */
541 /* Instruction costs on RS64A processors. */
542 static const
543 struct processor_costs rs64a_cost = {
544 COSTS_N_INSNS (20), /* mulsi */
545 COSTS_N_INSNS (12), /* mulsi_const */
546 COSTS_N_INSNS (8), /* mulsi_const9 */
547 COSTS_N_INSNS (34), /* muldi */
548 COSTS_N_INSNS (65), /* divsi */
549 COSTS_N_INSNS (67), /* divdi */
550 COSTS_N_INSNS (4), /* fp */
551 COSTS_N_INSNS (4), /* dmul */
552 COSTS_N_INSNS (31), /* sdiv */
553 COSTS_N_INSNS (31), /* ddiv */
554 128, /* cache line size */
555 128, /* l1 cache */
556 2048, /* l2 cache */
557 1, /* streams */
558 0, /* SF->DF convert */
561 /* Instruction costs on MPCCORE processors. */
562 static const
563 struct processor_costs mpccore_cost = {
564 COSTS_N_INSNS (2), /* mulsi */
565 COSTS_N_INSNS (2), /* mulsi_const */
566 COSTS_N_INSNS (2), /* mulsi_const9 */
567 COSTS_N_INSNS (2), /* muldi */
568 COSTS_N_INSNS (6), /* divsi */
569 COSTS_N_INSNS (6), /* divdi */
570 COSTS_N_INSNS (4), /* fp */
571 COSTS_N_INSNS (5), /* dmul */
572 COSTS_N_INSNS (10), /* sdiv */
573 COSTS_N_INSNS (17), /* ddiv */
574 32, /* cache line size */
575 4, /* l1 cache */
576 16, /* l2 cache */
577 1, /* streams */
578 0, /* SF->DF convert */
581 /* Instruction costs on PPC403 processors. */
582 static const
583 struct processor_costs ppc403_cost = {
584 COSTS_N_INSNS (4), /* mulsi */
585 COSTS_N_INSNS (4), /* mulsi_const */
586 COSTS_N_INSNS (4), /* mulsi_const9 */
587 COSTS_N_INSNS (4), /* muldi */
588 COSTS_N_INSNS (33), /* divsi */
589 COSTS_N_INSNS (33), /* divdi */
590 COSTS_N_INSNS (11), /* fp */
591 COSTS_N_INSNS (11), /* dmul */
592 COSTS_N_INSNS (11), /* sdiv */
593 COSTS_N_INSNS (11), /* ddiv */
594 32, /* cache line size */
595 4, /* l1 cache */
596 16, /* l2 cache */
597 1, /* streams */
598 0, /* SF->DF convert */
601 /* Instruction costs on PPC405 processors. */
602 static const
603 struct processor_costs ppc405_cost = {
604 COSTS_N_INSNS (5), /* mulsi */
605 COSTS_N_INSNS (4), /* mulsi_const */
606 COSTS_N_INSNS (3), /* mulsi_const9 */
607 COSTS_N_INSNS (5), /* muldi */
608 COSTS_N_INSNS (35), /* divsi */
609 COSTS_N_INSNS (35), /* divdi */
610 COSTS_N_INSNS (11), /* fp */
611 COSTS_N_INSNS (11), /* dmul */
612 COSTS_N_INSNS (11), /* sdiv */
613 COSTS_N_INSNS (11), /* ddiv */
614 32, /* cache line size */
615 16, /* l1 cache */
616 128, /* l2 cache */
617 1, /* streams */
618 0, /* SF->DF convert */
621 /* Instruction costs on PPC440 processors. */
622 static const
623 struct processor_costs ppc440_cost = {
624 COSTS_N_INSNS (3), /* mulsi */
625 COSTS_N_INSNS (2), /* mulsi_const */
626 COSTS_N_INSNS (2), /* mulsi_const9 */
627 COSTS_N_INSNS (3), /* muldi */
628 COSTS_N_INSNS (34), /* divsi */
629 COSTS_N_INSNS (34), /* divdi */
630 COSTS_N_INSNS (5), /* fp */
631 COSTS_N_INSNS (5), /* dmul */
632 COSTS_N_INSNS (19), /* sdiv */
633 COSTS_N_INSNS (33), /* ddiv */
634 32, /* cache line size */
635 32, /* l1 cache */
636 256, /* l2 cache */
637 1, /* streams */
638 0, /* SF->DF convert */
641 /* Instruction costs on PPC476 processors. */
642 static const
643 struct processor_costs ppc476_cost = {
644 COSTS_N_INSNS (4), /* mulsi */
645 COSTS_N_INSNS (4), /* mulsi_const */
646 COSTS_N_INSNS (4), /* mulsi_const9 */
647 COSTS_N_INSNS (4), /* muldi */
648 COSTS_N_INSNS (11), /* divsi */
649 COSTS_N_INSNS (11), /* divdi */
650 COSTS_N_INSNS (6), /* fp */
651 COSTS_N_INSNS (6), /* dmul */
652 COSTS_N_INSNS (19), /* sdiv */
653 COSTS_N_INSNS (33), /* ddiv */
654 32, /* l1 cache line size */
655 32, /* l1 cache */
656 512, /* l2 cache */
657 1, /* streams */
658 0, /* SF->DF convert */
661 /* Instruction costs on PPC601 processors. */
662 static const
663 struct processor_costs ppc601_cost = {
664 COSTS_N_INSNS (5), /* mulsi */
665 COSTS_N_INSNS (5), /* mulsi_const */
666 COSTS_N_INSNS (5), /* mulsi_const9 */
667 COSTS_N_INSNS (5), /* muldi */
668 COSTS_N_INSNS (36), /* divsi */
669 COSTS_N_INSNS (36), /* divdi */
670 COSTS_N_INSNS (4), /* fp */
671 COSTS_N_INSNS (5), /* dmul */
672 COSTS_N_INSNS (17), /* sdiv */
673 COSTS_N_INSNS (31), /* ddiv */
674 32, /* cache line size */
675 32, /* l1 cache */
676 256, /* l2 cache */
677 1, /* streams */
678 0, /* SF->DF convert */
681 /* Instruction costs on PPC603 processors. */
682 static const
683 struct processor_costs ppc603_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (3), /* mulsi_const */
686 COSTS_N_INSNS (2), /* mulsi_const9 */
687 COSTS_N_INSNS (5), /* muldi */
688 COSTS_N_INSNS (37), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (4), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (33), /* ddiv */
694 32, /* cache line size */
695 8, /* l1 cache */
696 64, /* l2 cache */
697 1, /* streams */
698 0, /* SF->DF convert */
701 /* Instruction costs on PPC604 processors. */
702 static const
703 struct processor_costs ppc604_cost = {
704 COSTS_N_INSNS (4), /* mulsi */
705 COSTS_N_INSNS (4), /* mulsi_const */
706 COSTS_N_INSNS (4), /* mulsi_const9 */
707 COSTS_N_INSNS (4), /* muldi */
708 COSTS_N_INSNS (20), /* divsi */
709 COSTS_N_INSNS (20), /* divdi */
710 COSTS_N_INSNS (3), /* fp */
711 COSTS_N_INSNS (3), /* dmul */
712 COSTS_N_INSNS (18), /* sdiv */
713 COSTS_N_INSNS (32), /* ddiv */
714 32, /* cache line size */
715 16, /* l1 cache */
716 512, /* l2 cache */
717 1, /* streams */
718 0, /* SF->DF convert */
721 /* Instruction costs on PPC604e processors. */
722 static const
723 struct processor_costs ppc604e_cost = {
724 COSTS_N_INSNS (2), /* mulsi */
725 COSTS_N_INSNS (2), /* mulsi_const */
726 COSTS_N_INSNS (2), /* mulsi_const9 */
727 COSTS_N_INSNS (2), /* muldi */
728 COSTS_N_INSNS (20), /* divsi */
729 COSTS_N_INSNS (20), /* divdi */
730 COSTS_N_INSNS (3), /* fp */
731 COSTS_N_INSNS (3), /* dmul */
732 COSTS_N_INSNS (18), /* sdiv */
733 COSTS_N_INSNS (32), /* ddiv */
734 32, /* cache line size */
735 32, /* l1 cache */
736 1024, /* l2 cache */
737 1, /* streams */
738 0, /* SF->DF convert */
741 /* Instruction costs on PPC620 processors. */
742 static const
743 struct processor_costs ppc620_cost = {
744 COSTS_N_INSNS (5), /* mulsi */
745 COSTS_N_INSNS (4), /* mulsi_const */
746 COSTS_N_INSNS (3), /* mulsi_const9 */
747 COSTS_N_INSNS (7), /* muldi */
748 COSTS_N_INSNS (21), /* divsi */
749 COSTS_N_INSNS (37), /* divdi */
750 COSTS_N_INSNS (3), /* fp */
751 COSTS_N_INSNS (3), /* dmul */
752 COSTS_N_INSNS (18), /* sdiv */
753 COSTS_N_INSNS (32), /* ddiv */
754 128, /* cache line size */
755 32, /* l1 cache */
756 1024, /* l2 cache */
757 1, /* streams */
758 0, /* SF->DF convert */
761 /* Instruction costs on PPC630 processors. */
762 static const
763 struct processor_costs ppc630_cost = {
764 COSTS_N_INSNS (5), /* mulsi */
765 COSTS_N_INSNS (4), /* mulsi_const */
766 COSTS_N_INSNS (3), /* mulsi_const9 */
767 COSTS_N_INSNS (7), /* muldi */
768 COSTS_N_INSNS (21), /* divsi */
769 COSTS_N_INSNS (37), /* divdi */
770 COSTS_N_INSNS (3), /* fp */
771 COSTS_N_INSNS (3), /* dmul */
772 COSTS_N_INSNS (17), /* sdiv */
773 COSTS_N_INSNS (21), /* ddiv */
774 128, /* cache line size */
775 64, /* l1 cache */
776 1024, /* l2 cache */
777 1, /* streams */
778 0, /* SF->DF convert */
781 /* Instruction costs on Cell processor. */
782 /* COSTS_N_INSNS (1) ~ one add. */
783 static const
784 struct processor_costs ppccell_cost = {
785 COSTS_N_INSNS (9/2)+2, /* mulsi */
786 COSTS_N_INSNS (6/2), /* mulsi_const */
787 COSTS_N_INSNS (6/2), /* mulsi_const9 */
788 COSTS_N_INSNS (15/2)+2, /* muldi */
789 COSTS_N_INSNS (38/2), /* divsi */
790 COSTS_N_INSNS (70/2), /* divdi */
791 COSTS_N_INSNS (10/2), /* fp */
792 COSTS_N_INSNS (10/2), /* dmul */
793 COSTS_N_INSNS (74/2), /* sdiv */
794 COSTS_N_INSNS (74/2), /* ddiv */
795 128, /* cache line size */
796 32, /* l1 cache */
797 512, /* l2 cache */
798 6, /* streams */
799 0, /* SF->DF convert */
802 /* Instruction costs on PPC750 and PPC7400 processors. */
803 static const
804 struct processor_costs ppc750_cost = {
805 COSTS_N_INSNS (5), /* mulsi */
806 COSTS_N_INSNS (3), /* mulsi_const */
807 COSTS_N_INSNS (2), /* mulsi_const9 */
808 COSTS_N_INSNS (5), /* muldi */
809 COSTS_N_INSNS (17), /* divsi */
810 COSTS_N_INSNS (17), /* divdi */
811 COSTS_N_INSNS (3), /* fp */
812 COSTS_N_INSNS (3), /* dmul */
813 COSTS_N_INSNS (17), /* sdiv */
814 COSTS_N_INSNS (31), /* ddiv */
815 32, /* cache line size */
816 32, /* l1 cache */
817 512, /* l2 cache */
818 1, /* streams */
819 0, /* SF->DF convert */
822 /* Instruction costs on PPC7450 processors. */
823 static const
824 struct processor_costs ppc7450_cost = {
825 COSTS_N_INSNS (4), /* mulsi */
826 COSTS_N_INSNS (3), /* mulsi_const */
827 COSTS_N_INSNS (3), /* mulsi_const9 */
828 COSTS_N_INSNS (4), /* muldi */
829 COSTS_N_INSNS (23), /* divsi */
830 COSTS_N_INSNS (23), /* divdi */
831 COSTS_N_INSNS (5), /* fp */
832 COSTS_N_INSNS (5), /* dmul */
833 COSTS_N_INSNS (21), /* sdiv */
834 COSTS_N_INSNS (35), /* ddiv */
835 32, /* cache line size */
836 32, /* l1 cache */
837 1024, /* l2 cache */
838 1, /* streams */
839 0, /* SF->DF convert */
842 /* Instruction costs on PPC8540 processors. */
843 static const
844 struct processor_costs ppc8540_cost = {
845 COSTS_N_INSNS (4), /* mulsi */
846 COSTS_N_INSNS (4), /* mulsi_const */
847 COSTS_N_INSNS (4), /* mulsi_const9 */
848 COSTS_N_INSNS (4), /* muldi */
849 COSTS_N_INSNS (19), /* divsi */
850 COSTS_N_INSNS (19), /* divdi */
851 COSTS_N_INSNS (4), /* fp */
852 COSTS_N_INSNS (4), /* dmul */
853 COSTS_N_INSNS (29), /* sdiv */
854 COSTS_N_INSNS (29), /* ddiv */
855 32, /* cache line size */
856 32, /* l1 cache */
857 256, /* l2 cache */
858 1, /* prefetch streams /*/
859 0, /* SF->DF convert */
862 /* Instruction costs on E300C2 and E300C3 cores. */
863 static const
864 struct processor_costs ppce300c2c3_cost = {
865 COSTS_N_INSNS (4), /* mulsi */
866 COSTS_N_INSNS (4), /* mulsi_const */
867 COSTS_N_INSNS (4), /* mulsi_const9 */
868 COSTS_N_INSNS (4), /* muldi */
869 COSTS_N_INSNS (19), /* divsi */
870 COSTS_N_INSNS (19), /* divdi */
871 COSTS_N_INSNS (3), /* fp */
872 COSTS_N_INSNS (4), /* dmul */
873 COSTS_N_INSNS (18), /* sdiv */
874 COSTS_N_INSNS (33), /* ddiv */
876 16, /* l1 cache */
877 16, /* l2 cache */
878 1, /* prefetch streams /*/
879 0, /* SF->DF convert */
882 /* Instruction costs on PPCE500MC processors. */
883 static const
884 struct processor_costs ppce500mc_cost = {
885 COSTS_N_INSNS (4), /* mulsi */
886 COSTS_N_INSNS (4), /* mulsi_const */
887 COSTS_N_INSNS (4), /* mulsi_const9 */
888 COSTS_N_INSNS (4), /* muldi */
889 COSTS_N_INSNS (14), /* divsi */
890 COSTS_N_INSNS (14), /* divdi */
891 COSTS_N_INSNS (8), /* fp */
892 COSTS_N_INSNS (10), /* dmul */
893 COSTS_N_INSNS (36), /* sdiv */
894 COSTS_N_INSNS (66), /* ddiv */
895 64, /* cache line size */
896 32, /* l1 cache */
897 128, /* l2 cache */
898 1, /* prefetch streams /*/
899 0, /* SF->DF convert */
902 /* Instruction costs on PPCE500MC64 processors. */
903 static const
904 struct processor_costs ppce500mc64_cost = {
905 COSTS_N_INSNS (4), /* mulsi */
906 COSTS_N_INSNS (4), /* mulsi_const */
907 COSTS_N_INSNS (4), /* mulsi_const9 */
908 COSTS_N_INSNS (4), /* muldi */
909 COSTS_N_INSNS (14), /* divsi */
910 COSTS_N_INSNS (14), /* divdi */
911 COSTS_N_INSNS (4), /* fp */
912 COSTS_N_INSNS (10), /* dmul */
913 COSTS_N_INSNS (36), /* sdiv */
914 COSTS_N_INSNS (66), /* ddiv */
915 64, /* cache line size */
916 32, /* l1 cache */
917 128, /* l2 cache */
918 1, /* prefetch streams /*/
919 0, /* SF->DF convert */
922 /* Instruction costs on PPCE5500 processors. */
923 static const
924 struct processor_costs ppce5500_cost = {
925 COSTS_N_INSNS (5), /* mulsi */
926 COSTS_N_INSNS (5), /* mulsi_const */
927 COSTS_N_INSNS (4), /* mulsi_const9 */
928 COSTS_N_INSNS (5), /* muldi */
929 COSTS_N_INSNS (14), /* divsi */
930 COSTS_N_INSNS (14), /* divdi */
931 COSTS_N_INSNS (7), /* fp */
932 COSTS_N_INSNS (10), /* dmul */
933 COSTS_N_INSNS (36), /* sdiv */
934 COSTS_N_INSNS (66), /* ddiv */
935 64, /* cache line size */
936 32, /* l1 cache */
937 128, /* l2 cache */
938 1, /* prefetch streams /*/
939 0, /* SF->DF convert */
942 /* Instruction costs on PPCE6500 processors. */
943 static const
944 struct processor_costs ppce6500_cost = {
945 COSTS_N_INSNS (5), /* mulsi */
946 COSTS_N_INSNS (5), /* mulsi_const */
947 COSTS_N_INSNS (4), /* mulsi_const9 */
948 COSTS_N_INSNS (5), /* muldi */
949 COSTS_N_INSNS (14), /* divsi */
950 COSTS_N_INSNS (14), /* divdi */
951 COSTS_N_INSNS (7), /* fp */
952 COSTS_N_INSNS (10), /* dmul */
953 COSTS_N_INSNS (36), /* sdiv */
954 COSTS_N_INSNS (66), /* ddiv */
955 64, /* cache line size */
956 32, /* l1 cache */
957 128, /* l2 cache */
958 1, /* prefetch streams /*/
959 0, /* SF->DF convert */
962 /* Instruction costs on AppliedMicro Titan processors. */
963 static const
964 struct processor_costs titan_cost = {
965 COSTS_N_INSNS (5), /* mulsi */
966 COSTS_N_INSNS (5), /* mulsi_const */
967 COSTS_N_INSNS (5), /* mulsi_const9 */
968 COSTS_N_INSNS (5), /* muldi */
969 COSTS_N_INSNS (18), /* divsi */
970 COSTS_N_INSNS (18), /* divdi */
971 COSTS_N_INSNS (10), /* fp */
972 COSTS_N_INSNS (10), /* dmul */
973 COSTS_N_INSNS (46), /* sdiv */
974 COSTS_N_INSNS (72), /* ddiv */
975 32, /* cache line size */
976 32, /* l1 cache */
977 512, /* l2 cache */
978 1, /* prefetch streams /*/
979 0, /* SF->DF convert */
982 /* Instruction costs on POWER4 and POWER5 processors. */
983 static const
984 struct processor_costs power4_cost = {
985 COSTS_N_INSNS (3), /* mulsi */
986 COSTS_N_INSNS (2), /* mulsi_const */
987 COSTS_N_INSNS (2), /* mulsi_const9 */
988 COSTS_N_INSNS (4), /* muldi */
989 COSTS_N_INSNS (18), /* divsi */
990 COSTS_N_INSNS (34), /* divdi */
991 COSTS_N_INSNS (3), /* fp */
992 COSTS_N_INSNS (3), /* dmul */
993 COSTS_N_INSNS (17), /* sdiv */
994 COSTS_N_INSNS (17), /* ddiv */
995 128, /* cache line size */
996 32, /* l1 cache */
997 1024, /* l2 cache */
998 8, /* prefetch streams /*/
999 0, /* SF->DF convert */
1002 /* Instruction costs on POWER6 processors. */
1003 static const
1004 struct processor_costs power6_cost = {
1005 COSTS_N_INSNS (8), /* mulsi */
1006 COSTS_N_INSNS (8), /* mulsi_const */
1007 COSTS_N_INSNS (8), /* mulsi_const9 */
1008 COSTS_N_INSNS (8), /* muldi */
1009 COSTS_N_INSNS (22), /* divsi */
1010 COSTS_N_INSNS (28), /* divdi */
1011 COSTS_N_INSNS (3), /* fp */
1012 COSTS_N_INSNS (3), /* dmul */
1013 COSTS_N_INSNS (13), /* sdiv */
1014 COSTS_N_INSNS (16), /* ddiv */
1015 128, /* cache line size */
1016 64, /* l1 cache */
1017 2048, /* l2 cache */
1018 16, /* prefetch streams */
1019 0, /* SF->DF convert */
1022 /* Instruction costs on POWER7 processors. */
1023 static const
1024 struct processor_costs power7_cost = {
1025 COSTS_N_INSNS (2), /* mulsi */
1026 COSTS_N_INSNS (2), /* mulsi_const */
1027 COSTS_N_INSNS (2), /* mulsi_const9 */
1028 COSTS_N_INSNS (2), /* muldi */
1029 COSTS_N_INSNS (18), /* divsi */
1030 COSTS_N_INSNS (34), /* divdi */
1031 COSTS_N_INSNS (3), /* fp */
1032 COSTS_N_INSNS (3), /* dmul */
1033 COSTS_N_INSNS (13), /* sdiv */
1034 COSTS_N_INSNS (16), /* ddiv */
1035 128, /* cache line size */
1036 32, /* l1 cache */
1037 256, /* l2 cache */
1038 12, /* prefetch streams */
1039 COSTS_N_INSNS (3), /* SF->DF convert */
1042 /* Instruction costs on POWER8 processors. */
1043 static const
1044 struct processor_costs power8_cost = {
1045 COSTS_N_INSNS (3), /* mulsi */
1046 COSTS_N_INSNS (3), /* mulsi_const */
1047 COSTS_N_INSNS (3), /* mulsi_const9 */
1048 COSTS_N_INSNS (3), /* muldi */
1049 COSTS_N_INSNS (19), /* divsi */
1050 COSTS_N_INSNS (35), /* divdi */
1051 COSTS_N_INSNS (3), /* fp */
1052 COSTS_N_INSNS (3), /* dmul */
1053 COSTS_N_INSNS (14), /* sdiv */
1054 COSTS_N_INSNS (17), /* ddiv */
1055 128, /* cache line size */
1056 32, /* l1 cache */
1057 256, /* l2 cache */
1058 12, /* prefetch streams */
1059 COSTS_N_INSNS (3), /* SF->DF convert */
1062 /* Instruction costs on POWER9 processors. */
1063 static const
1064 struct processor_costs power9_cost = {
1065 COSTS_N_INSNS (3), /* mulsi */
1066 COSTS_N_INSNS (3), /* mulsi_const */
1067 COSTS_N_INSNS (3), /* mulsi_const9 */
1068 COSTS_N_INSNS (3), /* muldi */
1069 COSTS_N_INSNS (8), /* divsi */
1070 COSTS_N_INSNS (12), /* divdi */
1071 COSTS_N_INSNS (3), /* fp */
1072 COSTS_N_INSNS (3), /* dmul */
1073 COSTS_N_INSNS (13), /* sdiv */
1074 COSTS_N_INSNS (18), /* ddiv */
1075 128, /* cache line size */
1076 32, /* l1 cache */
1077 512, /* l2 cache */
1078 8, /* prefetch streams */
1079 COSTS_N_INSNS (3), /* SF->DF convert */
1082 /* Instruction costs on POWER A2 processors. */
1083 static const
1084 struct processor_costs ppca2_cost = {
1085 COSTS_N_INSNS (16), /* mulsi */
1086 COSTS_N_INSNS (16), /* mulsi_const */
1087 COSTS_N_INSNS (16), /* mulsi_const9 */
1088 COSTS_N_INSNS (16), /* muldi */
1089 COSTS_N_INSNS (22), /* divsi */
1090 COSTS_N_INSNS (28), /* divdi */
1091 COSTS_N_INSNS (3), /* fp */
1092 COSTS_N_INSNS (3), /* dmul */
1093 COSTS_N_INSNS (59), /* sdiv */
1094 COSTS_N_INSNS (72), /* ddiv */
1096 16, /* l1 cache */
1097 2048, /* l2 cache */
1098 16, /* prefetch streams */
1099 0, /* SF->DF convert */
1102 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1103 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1106 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1107 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1108 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1111 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1112 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1113 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1114 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1115 bool);
1116 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1117 unsigned int);
1118 static bool is_microcoded_insn (rtx_insn *);
1119 static bool is_nonpipeline_insn (rtx_insn *);
1120 static bool is_cracked_insn (rtx_insn *);
1121 static bool is_load_insn (rtx, rtx *);
1122 static bool is_store_insn (rtx, rtx *);
1123 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1124 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1125 static bool insn_must_be_first_in_group (rtx_insn *);
1126 static bool insn_must_be_last_in_group (rtx_insn *);
1127 int easy_vector_constant (rtx, machine_mode);
1128 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1129 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1130 #if TARGET_MACHO
1131 static tree get_prev_label (tree);
1132 #endif
1133 static bool rs6000_mode_dependent_address (const_rtx);
1134 static bool rs6000_debug_mode_dependent_address (const_rtx);
1135 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1136 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1137 machine_mode, rtx);
1138 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1139 machine_mode,
1140 rtx);
1141 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1142 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1143 enum reg_class);
1144 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1145 reg_class_t,
1146 reg_class_t);
1147 static bool rs6000_debug_can_change_mode_class (machine_mode,
1148 machine_mode,
1149 reg_class_t);
1151 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1152 = rs6000_mode_dependent_address;
1154 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1155 machine_mode, rtx)
1156 = rs6000_secondary_reload_class;
1158 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1159 = rs6000_preferred_reload_class;
1161 const int INSN_NOT_AVAILABLE = -1;
1163 static void rs6000_print_isa_options (FILE *, int, const char *,
1164 HOST_WIDE_INT);
1165 static void rs6000_print_builtin_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1167 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1170 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1171 enum rs6000_reg_type,
1172 machine_mode,
1173 secondary_reload_info *,
1174 bool);
1175 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1177 /* Hash table stuff for keeping track of TOC entries. */
1179 struct GTY((for_user)) toc_hash_struct
1181 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1182 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1183 rtx key;
1184 machine_mode key_mode;
1185 int labelno;
1188 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1190 static hashval_t hash (toc_hash_struct *);
1191 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1198 /* Default register names. */
1199 char rs6000_reg_names[][8] =
1201 /* GPRs */
1202 "0", "1", "2", "3", "4", "5", "6", "7",
1203 "8", "9", "10", "11", "12", "13", "14", "15",
1204 "16", "17", "18", "19", "20", "21", "22", "23",
1205 "24", "25", "26", "27", "28", "29", "30", "31",
1206 /* FPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* VRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* lr ctr ca ap */
1217 "lr", "ctr", "ca", "ap",
1218 /* cr0..cr7 */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 /* vrsave vscr sfp */
1221 "vrsave", "vscr", "sfp",
1224 #ifdef TARGET_REGNAMES
1225 static const char alt_reg_names[][8] =
1227 /* GPRs */
1228 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1229 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1230 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1231 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1232 /* FPRs */
1233 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1234 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1235 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1236 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1237 /* VRs */
1238 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1239 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1240 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1241 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1242 /* lr ctr ca ap */
1243 "lr", "ctr", "ca", "ap",
1244 /* cr0..cr7 */
1245 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1246 /* vrsave vscr sfp */
1247 "vrsave", "vscr", "sfp",
1249 #endif
1251 /* Table of valid machine attributes. */
1253 static const struct attribute_spec rs6000_attribute_table[] =
1255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1256 affects_type_identity, handler, exclude } */
1257 { "altivec", 1, 1, false, true, false, false,
1258 rs6000_handle_altivec_attribute, NULL },
1259 { "longcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute, NULL },
1261 { "shortcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "ms_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute, NULL },
1265 { "gcc_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1268 SUBTARGET_ATTRIBUTE_TABLE,
1269 #endif
1270 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 #ifndef TARGET_PROFILE_KERNEL
1274 #define TARGET_PROFILE_KERNEL 0
1275 #endif
1277 /* Initialize the GCC target structure. */
1278 #undef TARGET_ATTRIBUTE_TABLE
1279 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1280 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1281 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1282 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1283 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1285 #undef TARGET_ASM_ALIGNED_DI_OP
1286 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1288 /* Default unaligned ops are only provided for ELF. Find the ops needed
1289 for non-ELF systems. */
1290 #ifndef OBJECT_FORMAT_ELF
1291 #if TARGET_XCOFF
1292 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1293 64-bit targets. */
1294 #undef TARGET_ASM_UNALIGNED_HI_OP
1295 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1296 #undef TARGET_ASM_UNALIGNED_SI_OP
1297 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1298 #undef TARGET_ASM_UNALIGNED_DI_OP
1299 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1300 #else
1301 /* For Darwin. */
1302 #undef TARGET_ASM_UNALIGNED_HI_OP
1303 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1304 #undef TARGET_ASM_UNALIGNED_SI_OP
1305 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1306 #undef TARGET_ASM_UNALIGNED_DI_OP
1307 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1308 #undef TARGET_ASM_ALIGNED_DI_OP
1309 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1310 #endif
1311 #endif
1313 /* This hook deals with fixups for relocatable code and DI-mode objects
1314 in 64-bit code. */
1315 #undef TARGET_ASM_INTEGER
1316 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1318 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1319 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1320 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1321 #endif
1323 #undef TARGET_SET_UP_BY_PROLOGUE
1324 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1326 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1327 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1328 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1329 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1330 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1336 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1339 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1340 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1342 #undef TARGET_INTERNAL_ARG_POINTER
1343 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1345 #undef TARGET_HAVE_TLS
1346 #define TARGET_HAVE_TLS HAVE_AS_TLS
1348 #undef TARGET_CANNOT_FORCE_CONST_MEM
1349 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1351 #undef TARGET_DELEGITIMIZE_ADDRESS
1352 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1354 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1355 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1357 #undef TARGET_LEGITIMATE_COMBINED_INSN
1358 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1360 #undef TARGET_ASM_FUNCTION_PROLOGUE
1361 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1362 #undef TARGET_ASM_FUNCTION_EPILOGUE
1363 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1368 #undef TARGET_LEGITIMIZE_ADDRESS
1369 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1371 #undef TARGET_SCHED_VARIABLE_ISSUE
1372 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1374 #undef TARGET_SCHED_ISSUE_RATE
1375 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1376 #undef TARGET_SCHED_ADJUST_COST
1377 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1378 #undef TARGET_SCHED_ADJUST_PRIORITY
1379 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1380 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1381 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1382 #undef TARGET_SCHED_INIT
1383 #define TARGET_SCHED_INIT rs6000_sched_init
1384 #undef TARGET_SCHED_FINISH
1385 #define TARGET_SCHED_FINISH rs6000_sched_finish
1386 #undef TARGET_SCHED_REORDER
1387 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1388 #undef TARGET_SCHED_REORDER2
1389 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1394 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1395 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1397 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1398 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1399 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1400 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1401 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1402 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1403 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1404 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1406 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1407 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1409 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1410 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1411 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1412 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1413 rs6000_builtin_support_vector_misalignment
1414 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1415 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1418 rs6000_builtin_vectorization_cost
1419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1421 rs6000_preferred_simd_mode
1422 #undef TARGET_VECTORIZE_INIT_COST
1423 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1424 #undef TARGET_VECTORIZE_ADD_STMT_COST
1425 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1426 #undef TARGET_VECTORIZE_FINISH_COST
1427 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1428 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1429 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1431 #undef TARGET_LOOP_UNROLL_ADJUST
1432 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1434 #undef TARGET_INIT_BUILTINS
1435 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1436 #undef TARGET_BUILTIN_DECL
1437 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1439 #undef TARGET_FOLD_BUILTIN
1440 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1441 #undef TARGET_GIMPLE_FOLD_BUILTIN
1442 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1444 #undef TARGET_EXPAND_BUILTIN
1445 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1447 #undef TARGET_MANGLE_TYPE
1448 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1450 #undef TARGET_INIT_LIBFUNCS
1451 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1453 #if TARGET_MACHO
1454 #undef TARGET_BINDS_LOCAL_P
1455 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1456 #endif
1458 #undef TARGET_MS_BITFIELD_LAYOUT_P
1459 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1461 #undef TARGET_ASM_OUTPUT_MI_THUNK
1462 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1467 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1468 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1470 #undef TARGET_REGISTER_MOVE_COST
1471 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1472 #undef TARGET_MEMORY_MOVE_COST
1473 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1474 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1475 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1476 rs6000_ira_change_pseudo_allocno_class
1477 #undef TARGET_CANNOT_COPY_INSN_P
1478 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1479 #undef TARGET_RTX_COSTS
1480 #define TARGET_RTX_COSTS rs6000_rtx_costs
1481 #undef TARGET_ADDRESS_COST
1482 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1483 #undef TARGET_INSN_COST
1484 #define TARGET_INSN_COST rs6000_insn_cost
1486 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1487 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1489 #undef TARGET_PROMOTE_FUNCTION_MODE
1490 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1492 #undef TARGET_RETURN_IN_MEMORY
1493 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1495 #undef TARGET_RETURN_IN_MSB
1496 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1498 #undef TARGET_SETUP_INCOMING_VARARGS
1499 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1501 /* Always strict argument naming on rs6000. */
1502 #undef TARGET_STRICT_ARGUMENT_NAMING
1503 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1504 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1505 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_SPLIT_COMPLEX_ARG
1507 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1508 #undef TARGET_MUST_PASS_IN_STACK
1509 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1510 #undef TARGET_PASS_BY_REFERENCE
1511 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1512 #undef TARGET_ARG_PARTIAL_BYTES
1513 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1514 #undef TARGET_FUNCTION_ARG_ADVANCE
1515 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1516 #undef TARGET_FUNCTION_ARG
1517 #define TARGET_FUNCTION_ARG rs6000_function_arg
1518 #undef TARGET_FUNCTION_ARG_PADDING
1519 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1520 #undef TARGET_FUNCTION_ARG_BOUNDARY
1521 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1523 #undef TARGET_BUILD_BUILTIN_VA_LIST
1524 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1526 #undef TARGET_EXPAND_BUILTIN_VA_START
1527 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1530 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1532 #undef TARGET_EH_RETURN_FILTER_MODE
1533 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1535 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1536 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1538 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1539 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1541 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1542 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1544 #undef TARGET_FLOATN_MODE
1545 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1547 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1548 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1550 #undef TARGET_MD_ASM_ADJUST
1551 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1553 #undef TARGET_OPTION_OVERRIDE
1554 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1556 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1557 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1558 rs6000_builtin_vectorized_function
1560 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1561 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1562 rs6000_builtin_md_vectorized_function
1564 #undef TARGET_STACK_PROTECT_GUARD
1565 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1567 #if !TARGET_MACHO
1568 #undef TARGET_STACK_PROTECT_FAIL
1569 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1570 #endif
1572 #ifdef HAVE_AS_TLS
1573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1575 #endif
1577 /* Use a 32-bit anchor range. This leads to sequences like:
1579 addis tmp,anchor,high
1580 add dest,tmp,low
1582 where tmp itself acts as an anchor, and can be shared between
1583 accesses to the same 64k page. */
1584 #undef TARGET_MIN_ANCHOR_OFFSET
1585 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1586 #undef TARGET_MAX_ANCHOR_OFFSET
1587 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1588 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1589 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1590 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1591 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1593 #undef TARGET_BUILTIN_RECIPROCAL
1594 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1596 #undef TARGET_SECONDARY_RELOAD
1597 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1598 #undef TARGET_SECONDARY_MEMORY_NEEDED
1599 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1601 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1603 #undef TARGET_LEGITIMATE_ADDRESS_P
1604 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1606 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1607 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1609 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1610 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1612 #undef TARGET_CAN_ELIMINATE
1613 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1615 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1616 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1618 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1619 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1621 #undef TARGET_TRAMPOLINE_INIT
1622 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1624 #undef TARGET_FUNCTION_VALUE
1625 #define TARGET_FUNCTION_VALUE rs6000_function_value
1627 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1628 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1630 #undef TARGET_OPTION_SAVE
1631 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1633 #undef TARGET_OPTION_RESTORE
1634 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1636 #undef TARGET_OPTION_PRINT
1637 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1639 #undef TARGET_CAN_INLINE_P
1640 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1642 #undef TARGET_SET_CURRENT_FUNCTION
1643 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1645 #undef TARGET_LEGITIMATE_CONSTANT_P
1646 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1648 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1649 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1651 #undef TARGET_CAN_USE_DOLOOP_P
1652 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1654 #undef TARGET_PREDICT_DOLOOP_P
1655 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1657 #undef TARGET_HAVE_COUNT_REG_DECR_P
1658 #define TARGET_HAVE_COUNT_REG_DECR_P true
1660 /* 1000000000 is infinite cost in IVOPTs. */
1661 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1662 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1664 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1665 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1667 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1668 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1670 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1671 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1672 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1673 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1674 #undef TARGET_UNWIND_WORD_MODE
1675 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1677 #undef TARGET_OFFLOAD_OPTIONS
1678 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1680 #undef TARGET_C_MODE_FOR_SUFFIX
1681 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1683 #undef TARGET_INVALID_BINARY_OP
1684 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1686 #undef TARGET_OPTAB_SUPPORTED_P
1687 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1689 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1690 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1692 #undef TARGET_COMPARE_VERSION_PRIORITY
1693 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1695 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1696 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1697 rs6000_generate_version_dispatcher_body
1699 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1700 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1701 rs6000_get_function_versions_dispatcher
1703 #undef TARGET_OPTION_FUNCTION_VERSIONS
1704 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1706 #undef TARGET_HARD_REGNO_NREGS
1707 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1708 #undef TARGET_HARD_REGNO_MODE_OK
1709 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1711 #undef TARGET_MODES_TIEABLE_P
1712 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1714 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1715 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1716 rs6000_hard_regno_call_part_clobbered
1718 #undef TARGET_SLOW_UNALIGNED_ACCESS
1719 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1721 #undef TARGET_CAN_CHANGE_MODE_CLASS
1722 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1724 #undef TARGET_CONSTANT_ALIGNMENT
1725 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1727 #undef TARGET_STARTING_FRAME_OFFSET
1728 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1730 #if TARGET_ELF && RS6000_WEAK
1731 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1732 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1733 #endif
1735 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1736 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1738 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1739 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1742 /* Processor table. */
1743 struct rs6000_ptt
1745 const char *const name; /* Canonical processor name. */
1746 const enum processor_type processor; /* Processor type enum value. */
1747 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1750 static struct rs6000_ptt const processor_target_table[] =
1752 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1753 #include "rs6000-cpus.def"
1754 #undef RS6000_CPU
1757 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1758 name is invalid. */
1760 static int
1761 rs6000_cpu_name_lookup (const char *name)
1763 size_t i;
1765 if (name != NULL)
1767 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1768 if (! strcmp (name, processor_target_table[i].name))
1769 return (int)i;
1772 return -1;
1776 /* Return number of consecutive hard regs needed starting at reg REGNO
1777 to hold something of mode MODE.
1778 This is ordinarily the length in words of a value of mode MODE
1779 but can be less for certain modes in special long registers.
1781 POWER and PowerPC GPRs hold 32 bits worth;
1782 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1784 static int
1785 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1787 unsigned HOST_WIDE_INT reg_size;
1789 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1790 128-bit floating point that can go in vector registers, which has VSX
1791 memory addressing. */
1792 if (FP_REGNO_P (regno))
1793 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1794 ? UNITS_PER_VSX_WORD
1795 : UNITS_PER_FP_WORD);
1797 else if (ALTIVEC_REGNO_P (regno))
1798 reg_size = UNITS_PER_ALTIVEC_WORD;
1800 else
1801 reg_size = UNITS_PER_WORD;
1803 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1806 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1807 MODE. */
1808 static int
1809 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1811 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1813 if (COMPLEX_MODE_P (mode))
1814 mode = GET_MODE_INNER (mode);
1816 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1817 register combinations, and use PTImode where we need to deal with quad
1818 word memory operations. Don't allow quad words in the argument or frame
1819 pointer registers, just registers 0..31. */
1820 if (mode == PTImode)
1821 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1822 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1823 && ((regno & 1) == 0));
1825 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1826 implementations. Don't allow an item to be split between a FP register
1827 and an Altivec register. Allow TImode in all VSX registers if the user
1828 asked for it. */
1829 if (TARGET_VSX && VSX_REGNO_P (regno)
1830 && (VECTOR_MEM_VSX_P (mode)
1831 || FLOAT128_VECTOR_P (mode)
1832 || reg_addr[mode].scalar_in_vmx_p
1833 || mode == TImode
1834 || (TARGET_VADDUQM && mode == V1TImode)))
1836 if (FP_REGNO_P (regno))
1837 return FP_REGNO_P (last_regno);
1839 if (ALTIVEC_REGNO_P (regno))
1841 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1842 return 0;
1844 return ALTIVEC_REGNO_P (last_regno);
1848 /* The GPRs can hold any mode, but values bigger than one register
1849 cannot go past R31. */
1850 if (INT_REGNO_P (regno))
1851 return INT_REGNO_P (last_regno);
1853 /* The float registers (except for VSX vector modes) can only hold floating
1854 modes and DImode. */
1855 if (FP_REGNO_P (regno))
1857 if (FLOAT128_VECTOR_P (mode))
1858 return false;
1860 if (SCALAR_FLOAT_MODE_P (mode)
1861 && (mode != TDmode || (regno % 2) == 0)
1862 && FP_REGNO_P (last_regno))
1863 return 1;
1865 if (GET_MODE_CLASS (mode) == MODE_INT)
1867 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1868 return 1;
1870 if (TARGET_P8_VECTOR && (mode == SImode))
1871 return 1;
1873 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1874 return 1;
1877 return 0;
1880 /* The CR register can only hold CC modes. */
1881 if (CR_REGNO_P (regno))
1882 return GET_MODE_CLASS (mode) == MODE_CC;
1884 if (CA_REGNO_P (regno))
1885 return mode == Pmode || mode == SImode;
1887 /* AltiVec only in AldyVec registers. */
1888 if (ALTIVEC_REGNO_P (regno))
1889 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1890 || mode == V1TImode);
1892 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1893 and it must be able to fit within the register set. */
1895 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1898 /* Implement TARGET_HARD_REGNO_NREGS. */
1900 static unsigned int
1901 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1903 return rs6000_hard_regno_nregs[mode][regno];
1906 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1908 static bool
1909 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1911 return rs6000_hard_regno_mode_ok_p[mode][regno];
1914 /* Implement TARGET_MODES_TIEABLE_P.
1916 PTImode cannot tie with other modes because PTImode is restricted to even
1917 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1918 57744).
1920 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1921 128-bit floating point on VSX systems ties with other vectors. */
1923 static bool
1924 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1926 if (mode1 == PTImode)
1927 return mode2 == PTImode;
1928 if (mode2 == PTImode)
1929 return false;
1931 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1932 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1933 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1934 return false;
1936 if (SCALAR_FLOAT_MODE_P (mode1))
1937 return SCALAR_FLOAT_MODE_P (mode2);
1938 if (SCALAR_FLOAT_MODE_P (mode2))
1939 return false;
1941 if (GET_MODE_CLASS (mode1) == MODE_CC)
1942 return GET_MODE_CLASS (mode2) == MODE_CC;
1943 if (GET_MODE_CLASS (mode2) == MODE_CC)
1944 return false;
1946 return true;
1949 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1951 static bool
1952 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1953 machine_mode mode)
1955 if (TARGET_32BIT
1956 && TARGET_POWERPC64
1957 && GET_MODE_SIZE (mode) > 4
1958 && INT_REGNO_P (regno))
1959 return true;
1961 if (TARGET_VSX
1962 && FP_REGNO_P (regno)
1963 && GET_MODE_SIZE (mode) > 8
1964 && !FLOAT128_2REG_P (mode))
1965 return true;
1967 return false;
1970 /* Print interesting facts about registers. */
1971 static void
1972 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1974 int r, m;
1976 for (r = first_regno; r <= last_regno; ++r)
1978 const char *comma = "";
1979 int len;
1981 if (first_regno == last_regno)
1982 fprintf (stderr, "%s:\t", reg_name);
1983 else
1984 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1986 len = 8;
1987 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1988 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1990 if (len > 70)
1992 fprintf (stderr, ",\n\t");
1993 len = 8;
1994 comma = "";
1997 if (rs6000_hard_regno_nregs[m][r] > 1)
1998 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1999 rs6000_hard_regno_nregs[m][r]);
2000 else
2001 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2003 comma = ", ";
2006 if (call_used_or_fixed_reg_p (r))
2008 if (len > 70)
2010 fprintf (stderr, ",\n\t");
2011 len = 8;
2012 comma = "";
2015 len += fprintf (stderr, "%s%s", comma, "call-used");
2016 comma = ", ";
2019 if (fixed_regs[r])
2021 if (len > 70)
2023 fprintf (stderr, ",\n\t");
2024 len = 8;
2025 comma = "";
2028 len += fprintf (stderr, "%s%s", comma, "fixed");
2029 comma = ", ";
2032 if (len > 70)
2034 fprintf (stderr, ",\n\t");
2035 comma = "";
2038 len += fprintf (stderr, "%sreg-class = %s", comma,
2039 reg_class_names[(int)rs6000_regno_regclass[r]]);
2040 comma = ", ";
2042 if (len > 70)
2044 fprintf (stderr, ",\n\t");
2045 comma = "";
2048 fprintf (stderr, "%sregno = %d\n", comma, r);
2052 static const char *
2053 rs6000_debug_vector_unit (enum rs6000_vector v)
2055 const char *ret;
2057 switch (v)
2059 case VECTOR_NONE: ret = "none"; break;
2060 case VECTOR_ALTIVEC: ret = "altivec"; break;
2061 case VECTOR_VSX: ret = "vsx"; break;
2062 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2063 default: ret = "unknown"; break;
2066 return ret;
2069 /* Inner function printing just the address mask for a particular reload
2070 register class. */
2071 DEBUG_FUNCTION char *
2072 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2074 static char ret[8];
2075 char *p = ret;
2077 if ((mask & RELOAD_REG_VALID) != 0)
2078 *p++ = 'v';
2079 else if (keep_spaces)
2080 *p++ = ' ';
2082 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2083 *p++ = 'm';
2084 else if (keep_spaces)
2085 *p++ = ' ';
2087 if ((mask & RELOAD_REG_INDEXED) != 0)
2088 *p++ = 'i';
2089 else if (keep_spaces)
2090 *p++ = ' ';
2092 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2093 *p++ = 'O';
2094 else if ((mask & RELOAD_REG_OFFSET) != 0)
2095 *p++ = 'o';
2096 else if (keep_spaces)
2097 *p++ = ' ';
2099 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2100 *p++ = '+';
2101 else if (keep_spaces)
2102 *p++ = ' ';
2104 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2105 *p++ = '+';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2109 if ((mask & RELOAD_REG_AND_M16) != 0)
2110 *p++ = '&';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2114 *p = '\0';
2116 return ret;
2119 /* Print the address masks in a human readble fashion. */
2120 DEBUG_FUNCTION void
2121 rs6000_debug_print_mode (ssize_t m)
2123 ssize_t rc;
2124 int spaces = 0;
2126 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2127 for (rc = 0; rc < N_RELOAD_REG; rc++)
2128 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2129 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2131 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2132 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2134 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2135 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2136 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2137 spaces = 0;
2139 else
2140 spaces += sizeof (" Reload=sl") - 1;
2142 if (reg_addr[m].scalar_in_vmx_p)
2144 fprintf (stderr, "%*s Upper=y", spaces, "");
2145 spaces = 0;
2147 else
2148 spaces += sizeof (" Upper=y") - 1;
2150 if (rs6000_vector_unit[m] != VECTOR_NONE
2151 || rs6000_vector_mem[m] != VECTOR_NONE)
2153 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2154 spaces, "",
2155 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2156 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2159 fputs ("\n", stderr);
2162 #define DEBUG_FMT_ID "%-32s= "
2163 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2164 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2165 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2167 /* Print various interesting information with -mdebug=reg. */
2168 static void
2169 rs6000_debug_reg_global (void)
2171 static const char *const tf[2] = { "false", "true" };
2172 const char *nl = (const char *)0;
2173 int m;
2174 size_t m1, m2, v;
2175 char costly_num[20];
2176 char nop_num[20];
2177 char flags_buffer[40];
2178 const char *costly_str;
2179 const char *nop_str;
2180 const char *trace_str;
2181 const char *abi_str;
2182 const char *cmodel_str;
2183 struct cl_target_option cl_opts;
2185 /* Modes we want tieable information on. */
2186 static const machine_mode print_tieable_modes[] = {
2187 QImode,
2188 HImode,
2189 SImode,
2190 DImode,
2191 TImode,
2192 PTImode,
2193 SFmode,
2194 DFmode,
2195 TFmode,
2196 IFmode,
2197 KFmode,
2198 SDmode,
2199 DDmode,
2200 TDmode,
2201 V16QImode,
2202 V8HImode,
2203 V4SImode,
2204 V2DImode,
2205 V1TImode,
2206 V32QImode,
2207 V16HImode,
2208 V8SImode,
2209 V4DImode,
2210 V2TImode,
2211 V4SFmode,
2212 V2DFmode,
2213 V8SFmode,
2214 V4DFmode,
2215 CCmode,
2216 CCUNSmode,
2217 CCEQmode,
2220 /* Virtual regs we are interested in. */
2221 const static struct {
2222 int regno; /* register number. */
2223 const char *name; /* register name. */
2224 } virtual_regs[] = {
2225 { STACK_POINTER_REGNUM, "stack pointer:" },
2226 { TOC_REGNUM, "toc: " },
2227 { STATIC_CHAIN_REGNUM, "static chain: " },
2228 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2229 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2230 { ARG_POINTER_REGNUM, "arg pointer: " },
2231 { FRAME_POINTER_REGNUM, "frame pointer:" },
2232 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2233 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2234 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2235 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2236 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2237 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2238 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2239 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2240 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2243 fputs ("\nHard register information:\n", stderr);
2244 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2245 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2246 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2247 LAST_ALTIVEC_REGNO,
2248 "vs");
2249 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2250 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2251 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2252 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2253 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2254 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2256 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2257 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2258 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2260 fprintf (stderr,
2261 "\n"
2262 "d reg_class = %s\n"
2263 "f reg_class = %s\n"
2264 "v reg_class = %s\n"
2265 "wa reg_class = %s\n"
2266 "we reg_class = %s\n"
2267 "wr reg_class = %s\n"
2268 "wx reg_class = %s\n"
2269 "wA reg_class = %s\n"
2270 "\n",
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2276 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2280 nl = "\n";
2281 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2282 rs6000_debug_print_mode (m);
2284 fputs ("\n", stderr);
2286 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2288 machine_mode mode1 = print_tieable_modes[m1];
2289 bool first_time = true;
2291 nl = (const char *)0;
2292 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2294 machine_mode mode2 = print_tieable_modes[m2];
2295 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2297 if (first_time)
2299 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2300 nl = "\n";
2301 first_time = false;
2304 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2308 if (!first_time)
2309 fputs ("\n", stderr);
2312 if (nl)
2313 fputs (nl, stderr);
2315 if (rs6000_recip_control)
2317 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2319 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2320 if (rs6000_recip_bits[m])
2322 fprintf (stderr,
2323 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2324 GET_MODE_NAME (m),
2325 (RS6000_RECIP_AUTO_RE_P (m)
2326 ? "auto"
2327 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2328 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2329 ? "auto"
2330 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2333 fputs ("\n", stderr);
2336 if (rs6000_cpu_index >= 0)
2338 const char *name = processor_target_table[rs6000_cpu_index].name;
2339 HOST_WIDE_INT flags
2340 = processor_target_table[rs6000_cpu_index].target_enable;
2342 sprintf (flags_buffer, "-mcpu=%s flags", name);
2343 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2345 else
2346 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2348 if (rs6000_tune_index >= 0)
2350 const char *name = processor_target_table[rs6000_tune_index].name;
2351 HOST_WIDE_INT flags
2352 = processor_target_table[rs6000_tune_index].target_enable;
2354 sprintf (flags_buffer, "-mtune=%s flags", name);
2355 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2357 else
2358 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2360 cl_target_option_save (&cl_opts, &global_options);
2361 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2362 rs6000_isa_flags);
2364 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2365 rs6000_isa_flags_explicit);
2367 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2368 rs6000_builtin_mask);
2370 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2372 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2373 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2375 switch (rs6000_sched_costly_dep)
2377 case max_dep_latency:
2378 costly_str = "max_dep_latency";
2379 break;
2381 case no_dep_costly:
2382 costly_str = "no_dep_costly";
2383 break;
2385 case all_deps_costly:
2386 costly_str = "all_deps_costly";
2387 break;
2389 case true_store_to_load_dep_costly:
2390 costly_str = "true_store_to_load_dep_costly";
2391 break;
2393 case store_to_load_dep_costly:
2394 costly_str = "store_to_load_dep_costly";
2395 break;
2397 default:
2398 costly_str = costly_num;
2399 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2400 break;
2403 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2405 switch (rs6000_sched_insert_nops)
2407 case sched_finish_regroup_exact:
2408 nop_str = "sched_finish_regroup_exact";
2409 break;
2411 case sched_finish_pad_groups:
2412 nop_str = "sched_finish_pad_groups";
2413 break;
2415 case sched_finish_none:
2416 nop_str = "sched_finish_none";
2417 break;
2419 default:
2420 nop_str = nop_num;
2421 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2422 break;
2425 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2427 switch (rs6000_sdata)
2429 default:
2430 case SDATA_NONE:
2431 break;
2433 case SDATA_DATA:
2434 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2435 break;
2437 case SDATA_SYSV:
2438 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2439 break;
2441 case SDATA_EABI:
2442 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2443 break;
2447 switch (rs6000_traceback)
2449 case traceback_default: trace_str = "default"; break;
2450 case traceback_none: trace_str = "none"; break;
2451 case traceback_part: trace_str = "part"; break;
2452 case traceback_full: trace_str = "full"; break;
2453 default: trace_str = "unknown"; break;
2456 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2458 switch (rs6000_current_cmodel)
2460 case CMODEL_SMALL: cmodel_str = "small"; break;
2461 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2462 case CMODEL_LARGE: cmodel_str = "large"; break;
2463 default: cmodel_str = "unknown"; break;
2466 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2468 switch (rs6000_current_abi)
2470 case ABI_NONE: abi_str = "none"; break;
2471 case ABI_AIX: abi_str = "aix"; break;
2472 case ABI_ELFv2: abi_str = "ELFv2"; break;
2473 case ABI_V4: abi_str = "V4"; break;
2474 case ABI_DARWIN: abi_str = "darwin"; break;
2475 default: abi_str = "unknown"; break;
2478 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2480 if (rs6000_altivec_abi)
2481 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2483 if (rs6000_darwin64_abi)
2484 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2486 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2487 (TARGET_SOFT_FLOAT ? "true" : "false"));
2489 if (TARGET_LINK_STACK)
2490 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2492 if (TARGET_P8_FUSION)
2494 char options[80];
2496 strcpy (options, "power8");
2497 if (TARGET_P8_FUSION_SIGN)
2498 strcat (options, ", sign");
2500 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2503 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2504 TARGET_SECURE_PLT ? "secure" : "bss");
2505 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2506 aix_struct_return ? "aix" : "sysv");
2507 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2508 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2509 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2510 tf[!!rs6000_align_branch_targets]);
2511 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2512 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2513 rs6000_long_double_type_size);
2514 if (rs6000_long_double_type_size > 64)
2516 fprintf (stderr, DEBUG_FMT_S, "long double type",
2517 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2518 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2519 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2521 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2522 (int)rs6000_sched_restricted_insns_priority);
2523 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2524 (int)END_BUILTINS);
2525 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2526 (int)RS6000_BUILTIN_COUNT);
2528 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2529 (int)TARGET_FLOAT128_ENABLE_TYPE);
2531 if (TARGET_VSX)
2532 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2533 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2535 if (TARGET_DIRECT_MOVE_128)
2536 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2537 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2541 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2542 legitimate address support to figure out the appropriate addressing to
2543 use. */
2545 static void
2546 rs6000_setup_reg_addr_masks (void)
2548 ssize_t rc, reg, m, nregs;
2549 addr_mask_type any_addr_mask, addr_mask;
2551 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2553 machine_mode m2 = (machine_mode) m;
2554 bool complex_p = false;
2555 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2556 size_t msize;
2558 if (COMPLEX_MODE_P (m2))
2560 complex_p = true;
2561 m2 = GET_MODE_INNER (m2);
2564 msize = GET_MODE_SIZE (m2);
2566 /* SDmode is special in that we want to access it only via REG+REG
2567 addressing on power7 and above, since we want to use the LFIWZX and
2568 STFIWZX instructions to load it. */
2569 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2571 any_addr_mask = 0;
2572 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2574 addr_mask = 0;
2575 reg = reload_reg_map[rc].reg;
2577 /* Can mode values go in the GPR/FPR/Altivec registers? */
2578 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2580 bool small_int_vsx_p = (small_int_p
2581 && (rc == RELOAD_REG_FPR
2582 || rc == RELOAD_REG_VMX));
2584 nregs = rs6000_hard_regno_nregs[m][reg];
2585 addr_mask |= RELOAD_REG_VALID;
2587 /* Indicate if the mode takes more than 1 physical register. If
2588 it takes a single register, indicate it can do REG+REG
2589 addressing. Small integers in VSX registers can only do
2590 REG+REG addressing. */
2591 if (small_int_vsx_p)
2592 addr_mask |= RELOAD_REG_INDEXED;
2593 else if (nregs > 1 || m == BLKmode || complex_p)
2594 addr_mask |= RELOAD_REG_MULTIPLE;
2595 else
2596 addr_mask |= RELOAD_REG_INDEXED;
2598 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2599 addressing. If we allow scalars into Altivec registers,
2600 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2602 For VSX systems, we don't allow update addressing for
2603 DFmode/SFmode if those registers can go in both the
2604 traditional floating point registers and Altivec registers.
2605 The load/store instructions for the Altivec registers do not
2606 have update forms. If we allowed update addressing, it seems
2607 to break IV-OPT code using floating point if the index type is
2608 int instead of long (PR target/81550 and target/84042). */
2610 if (TARGET_UPDATE
2611 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2612 && msize <= 8
2613 && !VECTOR_MODE_P (m2)
2614 && !FLOAT128_VECTOR_P (m2)
2615 && !complex_p
2616 && (m != E_DFmode || !TARGET_VSX)
2617 && (m != E_SFmode || !TARGET_P8_VECTOR)
2618 && !small_int_vsx_p)
2620 addr_mask |= RELOAD_REG_PRE_INCDEC;
2622 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2623 we don't allow PRE_MODIFY for some multi-register
2624 operations. */
2625 switch (m)
2627 default:
2628 addr_mask |= RELOAD_REG_PRE_MODIFY;
2629 break;
2631 case E_DImode:
2632 if (TARGET_POWERPC64)
2633 addr_mask |= RELOAD_REG_PRE_MODIFY;
2634 break;
2636 case E_DFmode:
2637 case E_DDmode:
2638 if (TARGET_HARD_FLOAT)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2645 /* GPR and FPR registers can do REG+OFFSET addressing, except
2646 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2647 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2648 if ((addr_mask != 0) && !indexed_only_p
2649 && msize <= 8
2650 && (rc == RELOAD_REG_GPR
2651 || ((msize == 8 || m2 == SFmode)
2652 && (rc == RELOAD_REG_FPR
2653 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2654 addr_mask |= RELOAD_REG_OFFSET;
2656 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2657 instructions are enabled. The offset for 128-bit VSX registers is
2658 only 12-bits. While GPRs can handle the full offset range, VSX
2659 registers can only handle the restricted range. */
2660 else if ((addr_mask != 0) && !indexed_only_p
2661 && msize == 16 && TARGET_P9_VECTOR
2662 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2663 || (m2 == TImode && TARGET_VSX)))
2665 addr_mask |= RELOAD_REG_OFFSET;
2666 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2667 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2670 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2671 addressing on 128-bit types. */
2672 if (rc == RELOAD_REG_VMX && msize == 16
2673 && (addr_mask & RELOAD_REG_VALID) != 0)
2674 addr_mask |= RELOAD_REG_AND_M16;
2676 reg_addr[m].addr_mask[rc] = addr_mask;
2677 any_addr_mask |= addr_mask;
2680 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2685 /* Initialize the various global tables that are based on register size. */
2686 static void
2687 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2689 ssize_t r, m, c;
2690 int align64;
2691 int align32;
2693 /* Precalculate REGNO_REG_CLASS. */
2694 rs6000_regno_regclass[0] = GENERAL_REGS;
2695 for (r = 1; r < 32; ++r)
2696 rs6000_regno_regclass[r] = BASE_REGS;
2698 for (r = 32; r < 64; ++r)
2699 rs6000_regno_regclass[r] = FLOAT_REGS;
2701 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2702 rs6000_regno_regclass[r] = NO_REGS;
2704 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2705 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2707 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2708 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2709 rs6000_regno_regclass[r] = CR_REGS;
2711 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2712 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2713 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2714 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2715 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2716 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2717 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2719 /* Precalculate register class to simpler reload register class. We don't
2720 need all of the register classes that are combinations of different
2721 classes, just the simple ones that have constraint letters. */
2722 for (c = 0; c < N_REG_CLASSES; c++)
2723 reg_class_to_reg_type[c] = NO_REG_TYPE;
2725 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2726 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2728 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2734 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2736 if (TARGET_VSX)
2738 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2739 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2741 else
2743 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2744 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2747 /* Precalculate the valid memory formats as well as the vector information,
2748 this must be set up before the rs6000_hard_regno_nregs_internal calls
2749 below. */
2750 gcc_assert ((int)VECTOR_NONE == 0);
2751 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2752 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2754 gcc_assert ((int)CODE_FOR_nothing == 0);
2755 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2757 gcc_assert ((int)NO_REGS == 0);
2758 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2760 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2761 believes it can use native alignment or still uses 128-bit alignment. */
2762 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2764 align64 = 64;
2765 align32 = 32;
2767 else
2769 align64 = 128;
2770 align32 = 128;
2773 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2774 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2775 if (TARGET_FLOAT128_TYPE)
2777 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2778 rs6000_vector_align[KFmode] = 128;
2780 if (FLOAT128_IEEE_P (TFmode))
2782 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2783 rs6000_vector_align[TFmode] = 128;
2787 /* V2DF mode, VSX only. */
2788 if (TARGET_VSX)
2790 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2791 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2792 rs6000_vector_align[V2DFmode] = align64;
2795 /* V4SF mode, either VSX or Altivec. */
2796 if (TARGET_VSX)
2798 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2799 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2800 rs6000_vector_align[V4SFmode] = align32;
2802 else if (TARGET_ALTIVEC)
2804 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2806 rs6000_vector_align[V4SFmode] = align32;
2809 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2810 and stores. */
2811 if (TARGET_ALTIVEC)
2813 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2814 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2815 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2816 rs6000_vector_align[V4SImode] = align32;
2817 rs6000_vector_align[V8HImode] = align32;
2818 rs6000_vector_align[V16QImode] = align32;
2820 if (TARGET_VSX)
2822 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2823 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2824 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2826 else
2828 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2829 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2830 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2834 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2835 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2836 if (TARGET_VSX)
2838 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2839 rs6000_vector_unit[V2DImode]
2840 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2841 rs6000_vector_align[V2DImode] = align64;
2843 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2844 rs6000_vector_unit[V1TImode]
2845 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2846 rs6000_vector_align[V1TImode] = 128;
2849 /* DFmode, see if we want to use the VSX unit. Memory is handled
2850 differently, so don't set rs6000_vector_mem. */
2851 if (TARGET_VSX)
2853 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2854 rs6000_vector_align[DFmode] = 64;
2857 /* SFmode, see if we want to use the VSX unit. */
2858 if (TARGET_P8_VECTOR)
2860 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2861 rs6000_vector_align[SFmode] = 32;
2864 /* Allow TImode in VSX register and set the VSX memory macros. */
2865 if (TARGET_VSX)
2867 rs6000_vector_mem[TImode] = VECTOR_VSX;
2868 rs6000_vector_align[TImode] = align64;
2871 /* Register class constraints for the constraints that depend on compile
2872 switches. When the VSX code was added, different constraints were added
2873 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2874 of the VSX registers are used. The register classes for scalar floating
2875 point types is set, based on whether we allow that type into the upper
2876 (Altivec) registers. GCC has register classes to target the Altivec
2877 registers for load/store operations, to select using a VSX memory
2878 operation instead of the traditional floating point operation. The
2879 constraints are:
2881 d - Register class to use with traditional DFmode instructions.
2882 f - Register class to use with traditional SFmode instructions.
2883 v - Altivec register.
2884 wa - Any VSX register.
2885 wc - Reserved to represent individual CR bits (used in LLVM).
2886 wn - always NO_REGS.
2887 wr - GPR if 64-bit mode is permitted.
2888 wx - Float register if we can do 32-bit int stores. */
2890 if (TARGET_HARD_FLOAT)
2892 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2893 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2896 if (TARGET_VSX)
2897 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2899 /* Add conditional constraints based on various options, to allow us to
2900 collapse multiple insn patterns. */
2901 if (TARGET_ALTIVEC)
2902 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2904 if (TARGET_POWERPC64)
2906 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2907 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2910 if (TARGET_STFIWX)
2911 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2913 /* Support for new direct moves (ISA 3.0 + 64bit). */
2914 if (TARGET_DIRECT_MOVE_128)
2915 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2917 /* Set up the reload helper and direct move functions. */
2918 if (TARGET_VSX || TARGET_ALTIVEC)
2920 if (TARGET_64BIT)
2922 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2923 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2924 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2925 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2926 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2927 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2928 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2929 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2930 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2931 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2932 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2933 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2934 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2935 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2936 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2937 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2938 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2939 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2940 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2941 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2943 if (FLOAT128_VECTOR_P (KFmode))
2945 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2946 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2949 if (FLOAT128_VECTOR_P (TFmode))
2951 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2952 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2955 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2956 available. */
2957 if (TARGET_NO_SDMODE_STACK)
2959 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2960 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2963 if (TARGET_VSX)
2965 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2966 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2969 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2971 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2972 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2973 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2974 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2975 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2976 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2977 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2978 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2979 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2981 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2982 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2983 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2984 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2985 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2986 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2987 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2988 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2989 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2991 if (FLOAT128_VECTOR_P (KFmode))
2993 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2994 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2997 if (FLOAT128_VECTOR_P (TFmode))
2999 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3000 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3004 else
3006 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3007 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3008 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3009 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3010 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3011 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3012 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3013 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3014 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3015 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3016 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3017 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3018 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3019 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3020 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3021 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3022 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3023 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3024 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3025 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3027 if (FLOAT128_VECTOR_P (KFmode))
3029 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3030 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3033 if (FLOAT128_IEEE_P (TFmode))
3035 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3036 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3039 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3040 available. */
3041 if (TARGET_NO_SDMODE_STACK)
3043 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3044 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3047 if (TARGET_VSX)
3049 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3050 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3053 if (TARGET_DIRECT_MOVE)
3055 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3056 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3057 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3061 reg_addr[DFmode].scalar_in_vmx_p = true;
3062 reg_addr[DImode].scalar_in_vmx_p = true;
3064 if (TARGET_P8_VECTOR)
3066 reg_addr[SFmode].scalar_in_vmx_p = true;
3067 reg_addr[SImode].scalar_in_vmx_p = true;
3069 if (TARGET_P9_VECTOR)
3071 reg_addr[HImode].scalar_in_vmx_p = true;
3072 reg_addr[QImode].scalar_in_vmx_p = true;
3077 /* Precalculate HARD_REGNO_NREGS. */
3078 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3079 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3080 rs6000_hard_regno_nregs[m][r]
3081 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3083 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_mode_ok_p[m][r]
3087 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3089 /* Precalculate CLASS_MAX_NREGS sizes. */
3090 for (c = 0; c < LIM_REG_CLASSES; ++c)
3092 int reg_size;
3094 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3095 reg_size = UNITS_PER_VSX_WORD;
3097 else if (c == ALTIVEC_REGS)
3098 reg_size = UNITS_PER_ALTIVEC_WORD;
3100 else if (c == FLOAT_REGS)
3101 reg_size = UNITS_PER_FP_WORD;
3103 else
3104 reg_size = UNITS_PER_WORD;
3106 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3108 machine_mode m2 = (machine_mode)m;
3109 int reg_size2 = reg_size;
3111 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3112 in VSX. */
3113 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3114 reg_size2 = UNITS_PER_FP_WORD;
3116 rs6000_class_max_nregs[m][c]
3117 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3121 /* Calculate which modes to automatically generate code to use a the
3122 reciprocal divide and square root instructions. In the future, possibly
3123 automatically generate the instructions even if the user did not specify
3124 -mrecip. The older machines double precision reciprocal sqrt estimate is
3125 not accurate enough. */
3126 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3127 if (TARGET_FRES)
3128 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3129 if (TARGET_FRE)
3130 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3131 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3132 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3133 if (VECTOR_UNIT_VSX_P (V2DFmode))
3134 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3136 if (TARGET_FRSQRTES)
3137 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3138 if (TARGET_FRSQRTE)
3139 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3140 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3141 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3142 if (VECTOR_UNIT_VSX_P (V2DFmode))
3143 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3145 if (rs6000_recip_control)
3147 if (!flag_finite_math_only)
3148 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3149 "-ffast-math");
3150 if (flag_trapping_math)
3151 warning (0, "%qs requires %qs or %qs", "-mrecip",
3152 "-fno-trapping-math", "-ffast-math");
3153 if (!flag_reciprocal_math)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3155 "-ffast-math");
3156 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3158 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3159 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3160 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3162 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3163 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3164 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3166 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3167 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3168 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3170 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3171 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3172 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3174 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3175 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3176 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3178 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3179 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3180 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3182 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3183 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3184 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3186 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3187 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3188 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3192 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3193 legitimate address support to figure out the appropriate addressing to
3194 use. */
3195 rs6000_setup_reg_addr_masks ();
3197 if (global_init_p || TARGET_DEBUG_TARGET)
3199 if (TARGET_DEBUG_REG)
3200 rs6000_debug_reg_global ();
3202 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3203 fprintf (stderr,
3204 "SImode variable mult cost = %d\n"
3205 "SImode constant mult cost = %d\n"
3206 "SImode short constant mult cost = %d\n"
3207 "DImode multipliciation cost = %d\n"
3208 "SImode division cost = %d\n"
3209 "DImode division cost = %d\n"
3210 "Simple fp operation cost = %d\n"
3211 "DFmode multiplication cost = %d\n"
3212 "SFmode division cost = %d\n"
3213 "DFmode division cost = %d\n"
3214 "cache line size = %d\n"
3215 "l1 cache size = %d\n"
3216 "l2 cache size = %d\n"
3217 "simultaneous prefetches = %d\n"
3218 "\n",
3219 rs6000_cost->mulsi,
3220 rs6000_cost->mulsi_const,
3221 rs6000_cost->mulsi_const9,
3222 rs6000_cost->muldi,
3223 rs6000_cost->divsi,
3224 rs6000_cost->divdi,
3225 rs6000_cost->fp,
3226 rs6000_cost->dmul,
3227 rs6000_cost->sdiv,
3228 rs6000_cost->ddiv,
3229 rs6000_cost->cache_line_size,
3230 rs6000_cost->l1_cache_size,
3231 rs6000_cost->l2_cache_size,
3232 rs6000_cost->simultaneous_prefetches);
3236 #if TARGET_MACHO
3237 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3239 static void
3240 darwin_rs6000_override_options (void)
3242 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3243 off. */
3244 rs6000_altivec_abi = 1;
3245 TARGET_ALTIVEC_VRSAVE = 1;
3246 rs6000_current_abi = ABI_DARWIN;
3248 if (DEFAULT_ABI == ABI_DARWIN
3249 && TARGET_64BIT)
3250 darwin_one_byte_bool = 1;
3252 if (TARGET_64BIT && ! TARGET_POWERPC64)
3254 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3255 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3258 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3259 optimisation, and will not work with the most generic case (where the
3260 symbol is undefined external, but there is no symbl stub). */
3261 if (TARGET_64BIT)
3262 rs6000_default_long_calls = 0;
3264 /* ld_classic is (so far) still used for kernel (static) code, and supports
3265 the JBSR longcall / branch islands. */
3266 if (flag_mkernel)
3268 rs6000_default_long_calls = 1;
3270 /* Allow a kext author to do -mkernel -mhard-float. */
3271 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3272 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3275 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3276 Altivec. */
3277 if (!flag_mkernel && !flag_apple_kext
3278 && TARGET_64BIT
3279 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3280 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3282 /* Unless the user (not the configurer) has explicitly overridden
3283 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3284 G4 unless targeting the kernel. */
3285 if (!flag_mkernel
3286 && !flag_apple_kext
3287 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3288 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3289 && ! global_options_set.x_rs6000_cpu_index)
3291 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3294 #endif
3296 /* If not otherwise specified by a target, make 'long double' equivalent to
3297 'double'. */
3299 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3300 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3301 #endif
3303 /* Return the builtin mask of the various options used that could affect which
3304 builtins were used. In the past we used target_flags, but we've run out of
3305 bits, and some options are no longer in target_flags. */
3307 HOST_WIDE_INT
3308 rs6000_builtin_mask_calculate (void)
3310 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3311 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3312 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3313 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3314 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3315 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3316 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3317 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3318 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3319 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3320 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3321 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3322 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3323 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3324 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3325 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3326 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3327 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3328 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3329 | ((TARGET_LONG_DOUBLE_128
3330 && TARGET_HARD_FLOAT
3331 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3332 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3333 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3336 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3337 to clobber the XER[CA] bit because clobbering that bit without telling
3338 the compiler worked just fine with versions of GCC before GCC 5, and
3339 breaking a lot of older code in ways that are hard to track down is
3340 not such a great idea. */
3342 static rtx_insn *
3343 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3344 vec<const char *> &/*constraints*/,
3345 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3347 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3348 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3349 return NULL;
3352 /* Override command line options.
3354 Combine build-specific configuration information with options
3355 specified on the command line to set various state variables which
3356 influence code generation, optimization, and expansion of built-in
3357 functions. Assure that command-line configuration preferences are
3358 compatible with each other and with the build configuration; issue
3359 warnings while adjusting configuration or error messages while
3360 rejecting configuration.
3362 Upon entry to this function:
3364 This function is called once at the beginning of
3365 compilation, and then again at the start and end of compiling
3366 each section of code that has a different configuration, as
3367 indicated, for example, by adding the
3369 __attribute__((__target__("cpu=power9")))
3371 qualifier to a function definition or, for example, by bracketing
3372 code between
3374 #pragma GCC target("altivec")
3378 #pragma GCC reset_options
3380 directives. Parameter global_init_p is true for the initial
3381 invocation, which initializes global variables, and false for all
3382 subsequent invocations.
3385 Various global state information is assumed to be valid. This
3386 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3387 default CPU specified at build configure time, TARGET_DEFAULT,
3388 representing the default set of option flags for the default
3389 target, and global_options_set.x_rs6000_isa_flags, representing
3390 which options were requested on the command line.
3392 Upon return from this function:
3394 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3395 was set by name on the command line. Additionally, if certain
3396 attributes are automatically enabled or disabled by this function
3397 in order to assure compatibility between options and
3398 configuration, the flags associated with those attributes are
3399 also set. By setting these "explicit bits", we avoid the risk
3400 that other code might accidentally overwrite these particular
3401 attributes with "default values".
3403 The various bits of rs6000_isa_flags are set to indicate the
3404 target options that have been selected for the most current
3405 compilation efforts. This has the effect of also turning on the
3406 associated TARGET_XXX values since these are macros which are
3407 generally defined to test the corresponding bit of the
3408 rs6000_isa_flags variable.
3410 The variable rs6000_builtin_mask is set to represent the target
3411 options for the most current compilation efforts, consistent with
3412 the current contents of rs6000_isa_flags. This variable controls
3413 expansion of built-in functions.
3415 Various other global variables and fields of global structures
3416 (over 50 in all) are initialized to reflect the desired options
3417 for the most current compilation efforts. */
3419 static bool
3420 rs6000_option_override_internal (bool global_init_p)
3422 bool ret = true;
3424 HOST_WIDE_INT set_masks;
3425 HOST_WIDE_INT ignore_masks;
3426 int cpu_index = -1;
3427 int tune_index;
3428 struct cl_target_option *main_target_opt
3429 = ((global_init_p || target_option_default_node == NULL)
3430 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3432 /* Print defaults. */
3433 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3434 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3436 /* Remember the explicit arguments. */
3437 if (global_init_p)
3438 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3440 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3441 library functions, so warn about it. The flag may be useful for
3442 performance studies from time to time though, so don't disable it
3443 entirely. */
3444 if (global_options_set.x_rs6000_alignment_flags
3445 && rs6000_alignment_flags == MASK_ALIGN_POWER
3446 && DEFAULT_ABI == ABI_DARWIN
3447 && TARGET_64BIT)
3448 warning (0, "%qs is not supported for 64-bit Darwin;"
3449 " it is incompatible with the installed C and C++ libraries",
3450 "-malign-power");
3452 /* Numerous experiment shows that IRA based loop pressure
3453 calculation works better for RTL loop invariant motion on targets
3454 with enough (>= 32) registers. It is an expensive optimization.
3455 So it is on only for peak performance. */
3456 if (optimize >= 3 && global_init_p
3457 && !global_options_set.x_flag_ira_loop_pressure)
3458 flag_ira_loop_pressure = 1;
3460 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3461 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3462 options were already specified. */
3463 if (flag_sanitize & SANITIZE_USER_ADDRESS
3464 && !global_options_set.x_flag_asynchronous_unwind_tables)
3465 flag_asynchronous_unwind_tables = 1;
3467 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3468 loop unroller is active. It is only checked during unrolling, so
3469 we can just set it on by default. */
3470 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3471 flag_variable_expansion_in_unroller = 1;
3473 /* Set the pointer size. */
3474 if (TARGET_64BIT)
3476 rs6000_pmode = DImode;
3477 rs6000_pointer_size = 64;
3479 else
3481 rs6000_pmode = SImode;
3482 rs6000_pointer_size = 32;
3485 /* Some OSs don't support saving the high part of 64-bit registers on context
3486 switch. Other OSs don't support saving Altivec registers. On those OSs,
3487 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3488 if the user wants either, the user must explicitly specify them and we
3489 won't interfere with the user's specification. */
3491 set_masks = POWERPC_MASKS;
3492 #ifdef OS_MISSING_POWERPC64
3493 if (OS_MISSING_POWERPC64)
3494 set_masks &= ~OPTION_MASK_POWERPC64;
3495 #endif
3496 #ifdef OS_MISSING_ALTIVEC
3497 if (OS_MISSING_ALTIVEC)
3498 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3499 | OTHER_VSX_VECTOR_MASKS);
3500 #endif
3502 /* Don't override by the processor default if given explicitly. */
3503 set_masks &= ~rs6000_isa_flags_explicit;
3505 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3506 the cpu in a target attribute or pragma, but did not specify a tuning
3507 option, use the cpu for the tuning option rather than the option specified
3508 with -mtune on the command line. Process a '--with-cpu' configuration
3509 request as an implicit --cpu. */
3510 if (rs6000_cpu_index >= 0)
3511 cpu_index = rs6000_cpu_index;
3512 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3513 cpu_index = main_target_opt->x_rs6000_cpu_index;
3514 else if (OPTION_TARGET_CPU_DEFAULT)
3515 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3517 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3518 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3519 with those from the cpu, except for options that were explicitly set. If
3520 we don't have a cpu, do not override the target bits set in
3521 TARGET_DEFAULT. */
3522 if (cpu_index >= 0)
3524 rs6000_cpu_index = cpu_index;
3525 rs6000_isa_flags &= ~set_masks;
3526 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3527 & set_masks);
3529 else
3531 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3532 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3533 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3534 to using rs6000_isa_flags, we need to do the initialization here.
3536 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3537 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3538 HOST_WIDE_INT flags;
3539 if (TARGET_DEFAULT)
3540 flags = TARGET_DEFAULT;
3541 else
3543 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3544 const char *default_cpu = (!TARGET_POWERPC64
3545 ? "powerpc"
3546 : (BYTES_BIG_ENDIAN
3547 ? "powerpc64"
3548 : "powerpc64le"));
3549 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3550 flags = processor_target_table[default_cpu_index].target_enable;
3552 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3555 if (rs6000_tune_index >= 0)
3556 tune_index = rs6000_tune_index;
3557 else if (cpu_index >= 0)
3558 rs6000_tune_index = tune_index = cpu_index;
3559 else
3561 size_t i;
3562 enum processor_type tune_proc
3563 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3565 tune_index = -1;
3566 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3567 if (processor_target_table[i].processor == tune_proc)
3569 tune_index = i;
3570 break;
3574 if (cpu_index >= 0)
3575 rs6000_cpu = processor_target_table[cpu_index].processor;
3576 else
3577 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3579 gcc_assert (tune_index >= 0);
3580 rs6000_tune = processor_target_table[tune_index].processor;
3582 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3583 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3584 || rs6000_cpu == PROCESSOR_PPCE5500)
3586 if (TARGET_ALTIVEC)
3587 error ("AltiVec not supported in this target");
3590 /* If we are optimizing big endian systems for space, use the load/store
3591 multiple instructions. */
3592 if (BYTES_BIG_ENDIAN && optimize_size)
3593 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3595 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3596 because the hardware doesn't support the instructions used in little
3597 endian mode, and causes an alignment trap. The 750 does not cause an
3598 alignment trap (except when the target is unaligned). */
3600 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3602 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3603 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3604 warning (0, "%qs is not supported on little endian systems",
3605 "-mmultiple");
3608 /* If little-endian, default to -mstrict-align on older processors.
3609 Testing for htm matches power8 and later. */
3610 if (!BYTES_BIG_ENDIAN
3611 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3612 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3614 if (!rs6000_fold_gimple)
3615 fprintf (stderr,
3616 "gimple folding of rs6000 builtins has been disabled.\n");
3618 /* Add some warnings for VSX. */
3619 if (TARGET_VSX)
3621 const char *msg = NULL;
3622 if (!TARGET_HARD_FLOAT)
3624 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3625 msg = N_("%<-mvsx%> requires hardware floating point");
3626 else
3628 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3629 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3632 else if (TARGET_AVOID_XFORM > 0)
3633 msg = N_("%<-mvsx%> needs indexed addressing");
3634 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3635 & OPTION_MASK_ALTIVEC))
3637 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3638 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3639 else
3640 msg = N_("%<-mno-altivec%> disables vsx");
3643 if (msg)
3645 warning (0, msg);
3646 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3647 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3651 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3652 the -mcpu setting to enable options that conflict. */
3653 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3654 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3655 | OPTION_MASK_ALTIVEC
3656 | OPTION_MASK_VSX)) != 0)
3657 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3658 | OPTION_MASK_DIRECT_MOVE)
3659 & ~rs6000_isa_flags_explicit);
3661 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3662 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3664 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3665 off all of the options that depend on those flags. */
3666 ignore_masks = rs6000_disable_incompatible_switches ();
3668 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3669 unless the user explicitly used the -mno-<option> to disable the code. */
3670 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3671 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3672 else if (TARGET_P9_MINMAX)
3674 if (cpu_index >= 0)
3676 if (cpu_index == PROCESSOR_POWER9)
3678 /* legacy behavior: allow -mcpu=power9 with certain
3679 capabilities explicitly disabled. */
3680 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3682 else
3683 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3684 "for <xxx> less than power9", "-mcpu");
3686 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3687 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3688 & rs6000_isa_flags_explicit))
3689 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3690 were explicitly cleared. */
3691 error ("%qs incompatible with explicitly disabled options",
3692 "-mpower9-minmax");
3693 else
3694 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3696 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3697 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3698 else if (TARGET_VSX)
3699 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3700 else if (TARGET_POPCNTD)
3701 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3702 else if (TARGET_DFP)
3703 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_CMPB)
3705 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3706 else if (TARGET_FPRND)
3707 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3708 else if (TARGET_POPCNTB)
3709 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3710 else if (TARGET_ALTIVEC)
3711 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3713 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3715 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3716 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3717 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3720 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3722 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3723 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3724 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3727 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3729 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3730 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3731 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3734 if (TARGET_P8_VECTOR && !TARGET_VSX)
3736 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3737 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3738 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3739 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3741 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3742 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3743 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3745 else
3747 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3748 not explicit. */
3749 rs6000_isa_flags |= OPTION_MASK_VSX;
3750 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3754 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3757 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3758 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3761 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3762 silently turn off quad memory mode. */
3763 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3765 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3766 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3768 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3769 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3771 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3772 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3775 /* Non-atomic quad memory load/store are disabled for little endian, since
3776 the words are reversed, but atomic operations can still be done by
3777 swapping the words. */
3778 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3780 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3781 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3782 "mode"));
3784 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3787 /* Assume if the user asked for normal quad memory instructions, they want
3788 the atomic versions as well, unless they explicity told us not to use quad
3789 word atomic instructions. */
3790 if (TARGET_QUAD_MEMORY
3791 && !TARGET_QUAD_MEMORY_ATOMIC
3792 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3793 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3795 /* If we can shrink-wrap the TOC register save separately, then use
3796 -msave-toc-indirect unless explicitly disabled. */
3797 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3798 && flag_shrink_wrap_separate
3799 && optimize_function_for_speed_p (cfun))
3800 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3802 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3803 generating power8 instructions. Power9 does not optimize power8 fusion
3804 cases. */
3805 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3807 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3808 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3809 else
3810 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3813 /* Setting additional fusion flags turns on base fusion. */
3814 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3816 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3818 if (TARGET_P8_FUSION_SIGN)
3819 error ("%qs requires %qs", "-mpower8-fusion-sign",
3820 "-mpower8-fusion");
3822 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3824 else
3825 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3828 /* Power8 does not fuse sign extended loads with the addis. If we are
3829 optimizing at high levels for speed, convert a sign extended load into a
3830 zero extending load, and an explicit sign extension. */
3831 if (TARGET_P8_FUSION
3832 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3833 && optimize_function_for_speed_p (cfun)
3834 && optimize >= 3)
3835 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3837 /* ISA 3.0 vector instructions include ISA 2.07. */
3838 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3840 /* We prefer to not mention undocumented options in
3841 error messages. However, if users have managed to select
3842 power9-vector without selecting power8-vector, they
3843 already know about undocumented flags. */
3844 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3845 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3846 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3847 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3849 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3850 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3851 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3853 else
3855 /* OPTION_MASK_P9_VECTOR is explicit and
3856 OPTION_MASK_P8_VECTOR is not explicit. */
3857 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3858 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3862 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3863 support. If we only have ISA 2.06 support, and the user did not specify
3864 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3865 but we don't enable the full vectorization support */
3866 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3867 TARGET_ALLOW_MOVMISALIGN = 1;
3869 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3871 if (TARGET_ALLOW_MOVMISALIGN > 0
3872 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3873 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3875 TARGET_ALLOW_MOVMISALIGN = 0;
3878 /* Determine when unaligned vector accesses are permitted, and when
3879 they are preferred over masked Altivec loads. Note that if
3880 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3881 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3882 not true. */
3883 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3885 if (!TARGET_VSX)
3887 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3888 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3890 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3893 else if (!TARGET_ALLOW_MOVMISALIGN)
3895 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3896 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3897 "-mallow-movmisalign");
3899 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3903 /* Use long double size to select the appropriate long double. We use
3904 TYPE_PRECISION to differentiate the 3 different long double types. We map
3905 128 into the precision used for TFmode. */
3906 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3907 ? 64
3908 : FLOAT_PRECISION_TFmode);
3910 /* Set long double size before the IEEE 128-bit tests. */
3911 if (!global_options_set.x_rs6000_long_double_type_size)
3913 if (main_target_opt != NULL
3914 && (main_target_opt->x_rs6000_long_double_type_size
3915 != default_long_double_size))
3916 error ("target attribute or pragma changes %<long double%> size");
3917 else
3918 rs6000_long_double_type_size = default_long_double_size;
3920 else if (rs6000_long_double_type_size == 128)
3921 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3922 else if (global_options_set.x_rs6000_ieeequad)
3924 if (global_options.x_rs6000_ieeequad)
3925 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3926 else
3927 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3930 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3931 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3932 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3933 those systems will not pick up this default. Warn if the user changes the
3934 default unless -Wno-psabi. */
3935 if (!global_options_set.x_rs6000_ieeequad)
3936 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3938 else
3940 if (global_options.x_rs6000_ieeequad
3941 && (!TARGET_POPCNTD || !TARGET_VSX))
3942 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3944 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3946 static bool warned_change_long_double;
3947 if (!warned_change_long_double)
3949 warned_change_long_double = true;
3950 if (TARGET_IEEEQUAD)
3951 warning (OPT_Wpsabi, "Using IEEE extended precision "
3952 "%<long double%>");
3953 else
3954 warning (OPT_Wpsabi, "Using IBM extended precision "
3955 "%<long double%>");
3960 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3961 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3962 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3963 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3964 the keyword as well as the type. */
3965 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3967 /* IEEE 128-bit floating point requires VSX support. */
3968 if (TARGET_FLOAT128_KEYWORD)
3970 if (!TARGET_VSX)
3972 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3973 error ("%qs requires VSX support", "%<-mfloat128%>");
3975 TARGET_FLOAT128_TYPE = 0;
3976 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3977 | OPTION_MASK_FLOAT128_HW);
3979 else if (!TARGET_FLOAT128_TYPE)
3981 TARGET_FLOAT128_TYPE = 1;
3982 warning (0, "The %<-mfloat128%> option may not be fully supported");
3986 /* Enable the __float128 keyword under Linux by default. */
3987 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3988 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3989 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3991 /* If we have are supporting the float128 type and full ISA 3.0 support,
3992 enable -mfloat128-hardware by default. However, don't enable the
3993 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3994 because sometimes the compiler wants to put things in an integer
3995 container, and if we don't have __int128 support, it is impossible. */
3996 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
3997 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
3998 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
3999 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4001 if (TARGET_FLOAT128_HW
4002 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4004 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4005 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4007 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4010 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4012 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4013 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4015 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4018 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4019 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4021 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4022 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4023 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4024 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4026 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4029 /* -mpcrel requires prefixed load/store addressing. */
4030 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4032 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4033 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4035 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4038 /* Print the options after updating the defaults. */
4039 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4040 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4042 /* E500mc does "better" if we inline more aggressively. Respect the
4043 user's opinion, though. */
4044 if (rs6000_block_move_inline_limit == 0
4045 && (rs6000_tune == PROCESSOR_PPCE500MC
4046 || rs6000_tune == PROCESSOR_PPCE500MC64
4047 || rs6000_tune == PROCESSOR_PPCE5500
4048 || rs6000_tune == PROCESSOR_PPCE6500))
4049 rs6000_block_move_inline_limit = 128;
4051 /* store_one_arg depends on expand_block_move to handle at least the
4052 size of reg_parm_stack_space. */
4053 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4054 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4056 if (global_init_p)
4058 /* If the appropriate debug option is enabled, replace the target hooks
4059 with debug versions that call the real version and then prints
4060 debugging information. */
4061 if (TARGET_DEBUG_COST)
4063 targetm.rtx_costs = rs6000_debug_rtx_costs;
4064 targetm.address_cost = rs6000_debug_address_cost;
4065 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4068 if (TARGET_DEBUG_ADDR)
4070 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4071 targetm.legitimize_address = rs6000_debug_legitimize_address;
4072 rs6000_secondary_reload_class_ptr
4073 = rs6000_debug_secondary_reload_class;
4074 targetm.secondary_memory_needed
4075 = rs6000_debug_secondary_memory_needed;
4076 targetm.can_change_mode_class
4077 = rs6000_debug_can_change_mode_class;
4078 rs6000_preferred_reload_class_ptr
4079 = rs6000_debug_preferred_reload_class;
4080 rs6000_mode_dependent_address_ptr
4081 = rs6000_debug_mode_dependent_address;
4084 if (rs6000_veclibabi_name)
4086 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4087 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4088 else
4090 error ("unknown vectorization library ABI type (%qs) for "
4091 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4092 ret = false;
4097 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4098 target attribute or pragma which automatically enables both options,
4099 unless the altivec ABI was set. This is set by default for 64-bit, but
4100 not for 32-bit. */
4101 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4103 TARGET_FLOAT128_TYPE = 0;
4104 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4105 | OPTION_MASK_FLOAT128_KEYWORD)
4106 & ~rs6000_isa_flags_explicit);
4109 /* Enable Altivec ABI for AIX -maltivec. */
4110 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4112 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4113 error ("target attribute or pragma changes AltiVec ABI");
4114 else
4115 rs6000_altivec_abi = 1;
4118 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4119 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4120 be explicitly overridden in either case. */
4121 if (TARGET_ELF)
4123 if (!global_options_set.x_rs6000_altivec_abi
4124 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4126 if (main_target_opt != NULL &&
4127 !main_target_opt->x_rs6000_altivec_abi)
4128 error ("target attribute or pragma changes AltiVec ABI");
4129 else
4130 rs6000_altivec_abi = 1;
4134 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4135 So far, the only darwin64 targets are also MACH-O. */
4136 if (TARGET_MACHO
4137 && DEFAULT_ABI == ABI_DARWIN
4138 && TARGET_64BIT)
4140 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4141 error ("target attribute or pragma changes darwin64 ABI");
4142 else
4144 rs6000_darwin64_abi = 1;
4145 /* Default to natural alignment, for better performance. */
4146 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4150 /* Place FP constants in the constant pool instead of TOC
4151 if section anchors enabled. */
4152 if (flag_section_anchors
4153 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4154 TARGET_NO_FP_IN_TOC = 1;
4156 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4157 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4159 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4160 SUBTARGET_OVERRIDE_OPTIONS;
4161 #endif
4162 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4163 SUBSUBTARGET_OVERRIDE_OPTIONS;
4164 #endif
4165 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4166 SUB3TARGET_OVERRIDE_OPTIONS;
4167 #endif
4169 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4170 after the subtarget override options are done. */
4171 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4173 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4174 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4176 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4179 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4180 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4182 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4183 && rs6000_tune != PROCESSOR_POWER5
4184 && rs6000_tune != PROCESSOR_POWER6
4185 && rs6000_tune != PROCESSOR_POWER7
4186 && rs6000_tune != PROCESSOR_POWER8
4187 && rs6000_tune != PROCESSOR_POWER9
4188 && rs6000_tune != PROCESSOR_FUTURE
4189 && rs6000_tune != PROCESSOR_PPCA2
4190 && rs6000_tune != PROCESSOR_CELL
4191 && rs6000_tune != PROCESSOR_PPC476);
4192 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4193 || rs6000_tune == PROCESSOR_POWER5
4194 || rs6000_tune == PROCESSOR_POWER7
4195 || rs6000_tune == PROCESSOR_POWER8);
4196 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4197 || rs6000_tune == PROCESSOR_POWER5
4198 || rs6000_tune == PROCESSOR_POWER6
4199 || rs6000_tune == PROCESSOR_POWER7
4200 || rs6000_tune == PROCESSOR_POWER8
4201 || rs6000_tune == PROCESSOR_POWER9
4202 || rs6000_tune == PROCESSOR_FUTURE
4203 || rs6000_tune == PROCESSOR_PPCE500MC
4204 || rs6000_tune == PROCESSOR_PPCE500MC64
4205 || rs6000_tune == PROCESSOR_PPCE5500
4206 || rs6000_tune == PROCESSOR_PPCE6500);
4208 /* Allow debug switches to override the above settings. These are set to -1
4209 in rs6000.opt to indicate the user hasn't directly set the switch. */
4210 if (TARGET_ALWAYS_HINT >= 0)
4211 rs6000_always_hint = TARGET_ALWAYS_HINT;
4213 if (TARGET_SCHED_GROUPS >= 0)
4214 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4216 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4217 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4219 rs6000_sched_restricted_insns_priority
4220 = (rs6000_sched_groups ? 1 : 0);
4222 /* Handle -msched-costly-dep option. */
4223 rs6000_sched_costly_dep
4224 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4226 if (rs6000_sched_costly_dep_str)
4228 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4229 rs6000_sched_costly_dep = no_dep_costly;
4230 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4231 rs6000_sched_costly_dep = all_deps_costly;
4232 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4233 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4234 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4235 rs6000_sched_costly_dep = store_to_load_dep_costly;
4236 else
4237 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4238 atoi (rs6000_sched_costly_dep_str));
4241 /* Handle -minsert-sched-nops option. */
4242 rs6000_sched_insert_nops
4243 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4245 if (rs6000_sched_insert_nops_str)
4247 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4248 rs6000_sched_insert_nops = sched_finish_none;
4249 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4250 rs6000_sched_insert_nops = sched_finish_pad_groups;
4251 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4252 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4253 else
4254 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4255 atoi (rs6000_sched_insert_nops_str));
4258 /* Handle stack protector */
4259 if (!global_options_set.x_rs6000_stack_protector_guard)
4260 #ifdef TARGET_THREAD_SSP_OFFSET
4261 rs6000_stack_protector_guard = SSP_TLS;
4262 #else
4263 rs6000_stack_protector_guard = SSP_GLOBAL;
4264 #endif
4266 #ifdef TARGET_THREAD_SSP_OFFSET
4267 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4268 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4269 #endif
4271 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4273 char *endp;
4274 const char *str = rs6000_stack_protector_guard_offset_str;
4276 errno = 0;
4277 long offset = strtol (str, &endp, 0);
4278 if (!*str || *endp || errno)
4279 error ("%qs is not a valid number in %qs", str,
4280 "-mstack-protector-guard-offset=");
4282 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4283 || (TARGET_64BIT && (offset & 3)))
4284 error ("%qs is not a valid offset in %qs", str,
4285 "-mstack-protector-guard-offset=");
4287 rs6000_stack_protector_guard_offset = offset;
4290 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4292 const char *str = rs6000_stack_protector_guard_reg_str;
4293 int reg = decode_reg_name (str);
4295 if (!IN_RANGE (reg, 1, 31))
4296 error ("%qs is not a valid base register in %qs", str,
4297 "-mstack-protector-guard-reg=");
4299 rs6000_stack_protector_guard_reg = reg;
4302 if (rs6000_stack_protector_guard == SSP_TLS
4303 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4304 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4306 if (global_init_p)
4308 #ifdef TARGET_REGNAMES
4309 /* If the user desires alternate register names, copy in the
4310 alternate names now. */
4311 if (TARGET_REGNAMES)
4312 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4313 #endif
4315 /* Set aix_struct_return last, after the ABI is determined.
4316 If -maix-struct-return or -msvr4-struct-return was explicitly
4317 used, don't override with the ABI default. */
4318 if (!global_options_set.x_aix_struct_return)
4319 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4321 #if 0
4322 /* IBM XL compiler defaults to unsigned bitfields. */
4323 if (TARGET_XL_COMPAT)
4324 flag_signed_bitfields = 0;
4325 #endif
4327 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4328 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4330 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4332 /* We can only guarantee the availability of DI pseudo-ops when
4333 assembling for 64-bit targets. */
4334 if (!TARGET_64BIT)
4336 targetm.asm_out.aligned_op.di = NULL;
4337 targetm.asm_out.unaligned_op.di = NULL;
4341 /* Set branch target alignment, if not optimizing for size. */
4342 if (!optimize_size)
4344 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4345 aligned 8byte to avoid misprediction by the branch predictor. */
4346 if (rs6000_tune == PROCESSOR_TITAN
4347 || rs6000_tune == PROCESSOR_CELL)
4349 if (flag_align_functions && !str_align_functions)
4350 str_align_functions = "8";
4351 if (flag_align_jumps && !str_align_jumps)
4352 str_align_jumps = "8";
4353 if (flag_align_loops && !str_align_loops)
4354 str_align_loops = "8";
4356 if (rs6000_align_branch_targets)
4358 if (flag_align_functions && !str_align_functions)
4359 str_align_functions = "16";
4360 if (flag_align_jumps && !str_align_jumps)
4361 str_align_jumps = "16";
4362 if (flag_align_loops && !str_align_loops)
4364 can_override_loop_align = 1;
4365 str_align_loops = "16";
4369 if (flag_align_jumps && !str_align_jumps)
4370 str_align_jumps = "16";
4371 if (flag_align_loops && !str_align_loops)
4372 str_align_loops = "16";
4375 /* Arrange to save and restore machine status around nested functions. */
4376 init_machine_status = rs6000_init_machine_status;
4378 /* We should always be splitting complex arguments, but we can't break
4379 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4380 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4381 targetm.calls.split_complex_arg = NULL;
4383 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4384 if (DEFAULT_ABI == ABI_AIX)
4385 targetm.calls.custom_function_descriptors = 0;
4388 /* Initialize rs6000_cost with the appropriate target costs. */
4389 if (optimize_size)
4390 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4391 else
4392 switch (rs6000_tune)
4394 case PROCESSOR_RS64A:
4395 rs6000_cost = &rs64a_cost;
4396 break;
4398 case PROCESSOR_MPCCORE:
4399 rs6000_cost = &mpccore_cost;
4400 break;
4402 case PROCESSOR_PPC403:
4403 rs6000_cost = &ppc403_cost;
4404 break;
4406 case PROCESSOR_PPC405:
4407 rs6000_cost = &ppc405_cost;
4408 break;
4410 case PROCESSOR_PPC440:
4411 rs6000_cost = &ppc440_cost;
4412 break;
4414 case PROCESSOR_PPC476:
4415 rs6000_cost = &ppc476_cost;
4416 break;
4418 case PROCESSOR_PPC601:
4419 rs6000_cost = &ppc601_cost;
4420 break;
4422 case PROCESSOR_PPC603:
4423 rs6000_cost = &ppc603_cost;
4424 break;
4426 case PROCESSOR_PPC604:
4427 rs6000_cost = &ppc604_cost;
4428 break;
4430 case PROCESSOR_PPC604e:
4431 rs6000_cost = &ppc604e_cost;
4432 break;
4434 case PROCESSOR_PPC620:
4435 rs6000_cost = &ppc620_cost;
4436 break;
4438 case PROCESSOR_PPC630:
4439 rs6000_cost = &ppc630_cost;
4440 break;
4442 case PROCESSOR_CELL:
4443 rs6000_cost = &ppccell_cost;
4444 break;
4446 case PROCESSOR_PPC750:
4447 case PROCESSOR_PPC7400:
4448 rs6000_cost = &ppc750_cost;
4449 break;
4451 case PROCESSOR_PPC7450:
4452 rs6000_cost = &ppc7450_cost;
4453 break;
4455 case PROCESSOR_PPC8540:
4456 case PROCESSOR_PPC8548:
4457 rs6000_cost = &ppc8540_cost;
4458 break;
4460 case PROCESSOR_PPCE300C2:
4461 case PROCESSOR_PPCE300C3:
4462 rs6000_cost = &ppce300c2c3_cost;
4463 break;
4465 case PROCESSOR_PPCE500MC:
4466 rs6000_cost = &ppce500mc_cost;
4467 break;
4469 case PROCESSOR_PPCE500MC64:
4470 rs6000_cost = &ppce500mc64_cost;
4471 break;
4473 case PROCESSOR_PPCE5500:
4474 rs6000_cost = &ppce5500_cost;
4475 break;
4477 case PROCESSOR_PPCE6500:
4478 rs6000_cost = &ppce6500_cost;
4479 break;
4481 case PROCESSOR_TITAN:
4482 rs6000_cost = &titan_cost;
4483 break;
4485 case PROCESSOR_POWER4:
4486 case PROCESSOR_POWER5:
4487 rs6000_cost = &power4_cost;
4488 break;
4490 case PROCESSOR_POWER6:
4491 rs6000_cost = &power6_cost;
4492 break;
4494 case PROCESSOR_POWER7:
4495 rs6000_cost = &power7_cost;
4496 break;
4498 case PROCESSOR_POWER8:
4499 rs6000_cost = &power8_cost;
4500 break;
4502 case PROCESSOR_POWER9:
4503 case PROCESSOR_FUTURE:
4504 rs6000_cost = &power9_cost;
4505 break;
4507 case PROCESSOR_PPCA2:
4508 rs6000_cost = &ppca2_cost;
4509 break;
4511 default:
4512 gcc_unreachable ();
4515 if (global_init_p)
4517 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4518 rs6000_cost->simultaneous_prefetches,
4519 global_options.x_param_values,
4520 global_options_set.x_param_values);
4521 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4522 global_options.x_param_values,
4523 global_options_set.x_param_values);
4524 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4525 rs6000_cost->cache_line_size,
4526 global_options.x_param_values,
4527 global_options_set.x_param_values);
4528 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4529 global_options.x_param_values,
4530 global_options_set.x_param_values);
4532 /* Increase loop peeling limits based on performance analysis. */
4533 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4534 global_options.x_param_values,
4535 global_options_set.x_param_values);
4536 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4537 global_options.x_param_values,
4538 global_options_set.x_param_values);
4540 /* Use the 'model' -fsched-pressure algorithm by default. */
4541 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4542 SCHED_PRESSURE_MODEL,
4543 global_options.x_param_values,
4544 global_options_set.x_param_values);
4546 /* Explicit -funroll-loops turns -munroll-only-small-loops off. */
4547 if (((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4548 || (global_options_set.x_flag_unroll_all_loops
4549 && flag_unroll_all_loops))
4550 && !global_options_set.x_unroll_only_small_loops)
4551 unroll_only_small_loops = 0;
4553 /* If using typedef char *va_list, signal that
4554 __builtin_va_start (&ap, 0) can be optimized to
4555 ap = __builtin_next_arg (0). */
4556 if (DEFAULT_ABI != ABI_V4)
4557 targetm.expand_builtin_va_start = NULL;
4560 /* If not explicitly specified via option, decide whether to generate indexed
4561 load/store instructions. A value of -1 indicates that the
4562 initial value of this variable has not been overwritten. During
4563 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4564 if (TARGET_AVOID_XFORM == -1)
4565 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4566 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4567 need indexed accesses and the type used is the scalar type of the element
4568 being loaded or stored. */
4569 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4570 && !TARGET_ALTIVEC);
4572 /* Set the -mrecip options. */
4573 if (rs6000_recip_name)
4575 char *p = ASTRDUP (rs6000_recip_name);
4576 char *q;
4577 unsigned int mask, i;
4578 bool invert;
4580 while ((q = strtok (p, ",")) != NULL)
4582 p = NULL;
4583 if (*q == '!')
4585 invert = true;
4586 q++;
4588 else
4589 invert = false;
4591 if (!strcmp (q, "default"))
4592 mask = ((TARGET_RECIP_PRECISION)
4593 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4594 else
4596 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4597 if (!strcmp (q, recip_options[i].string))
4599 mask = recip_options[i].mask;
4600 break;
4603 if (i == ARRAY_SIZE (recip_options))
4605 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4606 invert = false;
4607 mask = 0;
4608 ret = false;
4612 if (invert)
4613 rs6000_recip_control &= ~mask;
4614 else
4615 rs6000_recip_control |= mask;
4619 /* Set the builtin mask of the various options used that could affect which
4620 builtins were used. In the past we used target_flags, but we've run out
4621 of bits, and some options are no longer in target_flags. */
4622 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4623 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4624 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4625 rs6000_builtin_mask);
4627 /* Initialize all of the registers. */
4628 rs6000_init_hard_regno_mode_ok (global_init_p);
4630 /* Save the initial options in case the user does function specific options */
4631 if (global_init_p)
4632 target_option_default_node = target_option_current_node
4633 = build_target_option_node (&global_options);
4635 /* If not explicitly specified via option, decide whether to generate the
4636 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4637 if (TARGET_LINK_STACK == -1)
4638 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4640 /* Deprecate use of -mno-speculate-indirect-jumps. */
4641 if (!rs6000_speculate_indirect_jumps)
4642 warning (0, "%qs is deprecated and not recommended in any circumstances",
4643 "-mno-speculate-indirect-jumps");
4645 return ret;
4648 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4649 define the target cpu type. */
4651 static void
4652 rs6000_option_override (void)
4654 (void) rs6000_option_override_internal (true);
4658 /* Implement targetm.vectorize.builtin_mask_for_load. */
4659 static tree
4660 rs6000_builtin_mask_for_load (void)
4662 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4663 if ((TARGET_ALTIVEC && !TARGET_VSX)
4664 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4665 return altivec_builtin_mask_for_load;
4666 else
4667 return 0;
4670 /* Implement LOOP_ALIGN. */
4671 align_flags
4672 rs6000_loop_align (rtx label)
4674 basic_block bb;
4675 int ninsns;
4677 /* Don't override loop alignment if -falign-loops was specified. */
4678 if (!can_override_loop_align)
4679 return align_loops;
4681 bb = BLOCK_FOR_INSN (label);
4682 ninsns = num_loop_insns(bb->loop_father);
4684 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4685 if (ninsns > 4 && ninsns <= 8
4686 && (rs6000_tune == PROCESSOR_POWER4
4687 || rs6000_tune == PROCESSOR_POWER5
4688 || rs6000_tune == PROCESSOR_POWER6
4689 || rs6000_tune == PROCESSOR_POWER7
4690 || rs6000_tune == PROCESSOR_POWER8))
4691 return align_flags (5);
4692 else
4693 return align_loops;
4696 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4697 after applying N number of iterations. This routine does not determine
4698 how may iterations are required to reach desired alignment. */
4700 static bool
4701 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4703 if (is_packed)
4704 return false;
4706 if (TARGET_32BIT)
4708 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4709 return true;
4711 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4712 return true;
4714 return false;
4716 else
4718 if (TARGET_MACHO)
4719 return false;
4721 /* Assuming that all other types are naturally aligned. CHECKME! */
4722 return true;
4726 /* Return true if the vector misalignment factor is supported by the
4727 target. */
4728 static bool
4729 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4730 const_tree type,
4731 int misalignment,
4732 bool is_packed)
4734 if (TARGET_VSX)
4736 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4737 return true;
4739 /* Return if movmisalign pattern is not supported for this mode. */
4740 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4741 return false;
4743 if (misalignment == -1)
4745 /* Misalignment factor is unknown at compile time but we know
4746 it's word aligned. */
4747 if (rs6000_vector_alignment_reachable (type, is_packed))
4749 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4751 if (element_size == 64 || element_size == 32)
4752 return true;
4755 return false;
4758 /* VSX supports word-aligned vector. */
4759 if (misalignment % 4 == 0)
4760 return true;
4762 return false;
4765 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4766 static int
4767 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4768 tree vectype, int misalign)
4770 unsigned elements;
4771 tree elem_type;
4773 switch (type_of_cost)
4775 case scalar_stmt:
4776 case scalar_store:
4777 case vector_stmt:
4778 case vector_store:
4779 case vec_to_scalar:
4780 case scalar_to_vec:
4781 case cond_branch_not_taken:
4782 return 1;
4783 case scalar_load:
4784 case vector_load:
4785 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4786 return 2;
4788 case vec_perm:
4789 /* Power7 has only one permute unit, make it a bit expensive. */
4790 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4791 return 3;
4792 else
4793 return 1;
4795 case vec_promote_demote:
4796 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4797 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4798 return 4;
4799 else
4800 return 1;
4802 case cond_branch_taken:
4803 return 3;
4805 case unaligned_load:
4806 case vector_gather_load:
4807 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4808 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4809 return 2;
4811 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4813 elements = TYPE_VECTOR_SUBPARTS (vectype);
4814 if (elements == 2)
4815 /* Double word aligned. */
4816 return 4;
4818 if (elements == 4)
4820 switch (misalign)
4822 case 8:
4823 /* Double word aligned. */
4824 return 4;
4826 case -1:
4827 /* Unknown misalignment. */
4828 case 4:
4829 case 12:
4830 /* Word aligned. */
4831 return 33;
4833 default:
4834 gcc_unreachable ();
4839 if (TARGET_ALTIVEC)
4840 /* Misaligned loads are not supported. */
4841 gcc_unreachable ();
4843 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4844 return 4;
4846 case unaligned_store:
4847 case vector_scatter_store:
4848 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4849 return 1;
4851 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4853 elements = TYPE_VECTOR_SUBPARTS (vectype);
4854 if (elements == 2)
4855 /* Double word aligned. */
4856 return 2;
4858 if (elements == 4)
4860 switch (misalign)
4862 case 8:
4863 /* Double word aligned. */
4864 return 2;
4866 case -1:
4867 /* Unknown misalignment. */
4868 case 4:
4869 case 12:
4870 /* Word aligned. */
4871 return 23;
4873 default:
4874 gcc_unreachable ();
4879 if (TARGET_ALTIVEC)
4880 /* Misaligned stores are not supported. */
4881 gcc_unreachable ();
4883 return 2;
4885 case vec_construct:
4886 /* This is a rough approximation assuming non-constant elements
4887 constructed into a vector via element insertion. FIXME:
4888 vec_construct is not granular enough for uniformly good
4889 decisions. If the initialization is a splat, this is
4890 cheaper than we estimate. Improve this someday. */
4891 elem_type = TREE_TYPE (vectype);
4892 /* 32-bit vectors loaded into registers are stored as double
4893 precision, so we need 2 permutes, 2 converts, and 1 merge
4894 to construct a vector of short floats from them. */
4895 if (SCALAR_FLOAT_TYPE_P (elem_type)
4896 && TYPE_PRECISION (elem_type) == 32)
4897 return 5;
4898 /* On POWER9, integer vector types are built up in GPRs and then
4899 use a direct move (2 cycles). For POWER8 this is even worse,
4900 as we need two direct moves and a merge, and the direct moves
4901 are five cycles. */
4902 else if (INTEGRAL_TYPE_P (elem_type))
4904 if (TARGET_P9_VECTOR)
4905 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4906 else
4907 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4909 else
4910 /* V2DFmode doesn't need a direct move. */
4911 return 2;
4913 default:
4914 gcc_unreachable ();
4918 /* Implement targetm.vectorize.preferred_simd_mode. */
4920 static machine_mode
4921 rs6000_preferred_simd_mode (scalar_mode mode)
4923 if (TARGET_VSX)
4924 switch (mode)
4926 case E_DFmode:
4927 return V2DFmode;
4928 default:;
4930 if (TARGET_ALTIVEC || TARGET_VSX)
4931 switch (mode)
4933 case E_SFmode:
4934 return V4SFmode;
4935 case E_TImode:
4936 return V1TImode;
4937 case E_DImode:
4938 return V2DImode;
4939 case E_SImode:
4940 return V4SImode;
4941 case E_HImode:
4942 return V8HImode;
4943 case E_QImode:
4944 return V16QImode;
4945 default:;
4947 return word_mode;
4950 typedef struct _rs6000_cost_data
4952 struct loop *loop_info;
4953 unsigned cost[3];
4954 } rs6000_cost_data;
4956 /* Test for likely overcommitment of vector hardware resources. If a
4957 loop iteration is relatively large, and too large a percentage of
4958 instructions in the loop are vectorized, the cost model may not
4959 adequately reflect delays from unavailable vector resources.
4960 Penalize the loop body cost for this case. */
4962 static void
4963 rs6000_density_test (rs6000_cost_data *data)
4965 const int DENSITY_PCT_THRESHOLD = 85;
4966 const int DENSITY_SIZE_THRESHOLD = 70;
4967 const int DENSITY_PENALTY = 10;
4968 struct loop *loop = data->loop_info;
4969 basic_block *bbs = get_loop_body (loop);
4970 int nbbs = loop->num_nodes;
4971 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4972 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4973 int i, density_pct;
4975 for (i = 0; i < nbbs; i++)
4977 basic_block bb = bbs[i];
4978 gimple_stmt_iterator gsi;
4980 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4982 gimple *stmt = gsi_stmt (gsi);
4983 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4985 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4986 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4987 not_vec_cost++;
4991 free (bbs);
4992 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4994 if (density_pct > DENSITY_PCT_THRESHOLD
4995 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4997 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4998 if (dump_enabled_p ())
4999 dump_printf_loc (MSG_NOTE, vect_location,
5000 "density %d%%, cost %d exceeds threshold, penalizing "
5001 "loop body cost by %d%%", density_pct,
5002 vec_cost + not_vec_cost, DENSITY_PENALTY);
5006 /* Implement targetm.vectorize.init_cost. */
5008 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5009 instruction is needed by the vectorization. */
5010 static bool rs6000_vect_nonmem;
5012 static void *
5013 rs6000_init_cost (struct loop *loop_info)
5015 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5016 data->loop_info = loop_info;
5017 data->cost[vect_prologue] = 0;
5018 data->cost[vect_body] = 0;
5019 data->cost[vect_epilogue] = 0;
5020 rs6000_vect_nonmem = false;
5021 return data;
5024 /* Implement targetm.vectorize.add_stmt_cost. */
5026 static unsigned
5027 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5028 struct _stmt_vec_info *stmt_info, int misalign,
5029 enum vect_cost_model_location where)
5031 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5032 unsigned retval = 0;
5034 if (flag_vect_cost_model)
5036 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5037 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5038 misalign);
5039 /* Statements in an inner loop relative to the loop being
5040 vectorized are weighted more heavily. The value here is
5041 arbitrary and could potentially be improved with analysis. */
5042 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5043 count *= 50; /* FIXME. */
5045 retval = (unsigned) (count * stmt_cost);
5046 cost_data->cost[where] += retval;
5048 /* Check whether we're doing something other than just a copy loop.
5049 Not all such loops may be profitably vectorized; see
5050 rs6000_finish_cost. */
5051 if ((kind == vec_to_scalar || kind == vec_perm
5052 || kind == vec_promote_demote || kind == vec_construct
5053 || kind == scalar_to_vec)
5054 || (where == vect_body && kind == vector_stmt))
5055 rs6000_vect_nonmem = true;
5058 return retval;
5061 /* Implement targetm.vectorize.finish_cost. */
5063 static void
5064 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5065 unsigned *body_cost, unsigned *epilogue_cost)
5067 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5069 if (cost_data->loop_info)
5070 rs6000_density_test (cost_data);
5072 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5073 that require versioning for any reason. The vectorization is at
5074 best a wash inside the loop, and the versioning checks make
5075 profitability highly unlikely and potentially quite harmful. */
5076 if (cost_data->loop_info)
5078 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5079 if (!rs6000_vect_nonmem
5080 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5081 && LOOP_REQUIRES_VERSIONING (vec_info))
5082 cost_data->cost[vect_body] += 10000;
5085 *prologue_cost = cost_data->cost[vect_prologue];
5086 *body_cost = cost_data->cost[vect_body];
5087 *epilogue_cost = cost_data->cost[vect_epilogue];
5090 /* Implement targetm.vectorize.destroy_cost_data. */
5092 static void
5093 rs6000_destroy_cost_data (void *data)
5095 free (data);
5098 /* Implement targetm.loop_unroll_adjust. */
5100 static unsigned
5101 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5103 if (unroll_only_small_loops)
5105 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5106 example we may want to unroll very small loops more times (4 perhaps).
5107 We also should use a PARAM for this. */
5108 if (loop->ninsns <= 10)
5109 return MIN (2, nunroll);
5110 else
5111 return 0;
5114 return nunroll;
5117 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5118 library with vectorized intrinsics. */
5120 static tree
5121 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5122 tree type_in)
5124 char name[32];
5125 const char *suffix = NULL;
5126 tree fntype, new_fndecl, bdecl = NULL_TREE;
5127 int n_args = 1;
5128 const char *bname;
5129 machine_mode el_mode, in_mode;
5130 int n, in_n;
5132 /* Libmass is suitable for unsafe math only as it does not correctly support
5133 parts of IEEE with the required precision such as denormals. Only support
5134 it if we have VSX to use the simd d2 or f4 functions.
5135 XXX: Add variable length support. */
5136 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5137 return NULL_TREE;
5139 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5140 n = TYPE_VECTOR_SUBPARTS (type_out);
5141 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5142 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5143 if (el_mode != in_mode
5144 || n != in_n)
5145 return NULL_TREE;
5147 switch (fn)
5149 CASE_CFN_ATAN2:
5150 CASE_CFN_HYPOT:
5151 CASE_CFN_POW:
5152 n_args = 2;
5153 gcc_fallthrough ();
5155 CASE_CFN_ACOS:
5156 CASE_CFN_ACOSH:
5157 CASE_CFN_ASIN:
5158 CASE_CFN_ASINH:
5159 CASE_CFN_ATAN:
5160 CASE_CFN_ATANH:
5161 CASE_CFN_CBRT:
5162 CASE_CFN_COS:
5163 CASE_CFN_COSH:
5164 CASE_CFN_ERF:
5165 CASE_CFN_ERFC:
5166 CASE_CFN_EXP2:
5167 CASE_CFN_EXP:
5168 CASE_CFN_EXPM1:
5169 CASE_CFN_LGAMMA:
5170 CASE_CFN_LOG10:
5171 CASE_CFN_LOG1P:
5172 CASE_CFN_LOG2:
5173 CASE_CFN_LOG:
5174 CASE_CFN_SIN:
5175 CASE_CFN_SINH:
5176 CASE_CFN_SQRT:
5177 CASE_CFN_TAN:
5178 CASE_CFN_TANH:
5179 if (el_mode == DFmode && n == 2)
5181 bdecl = mathfn_built_in (double_type_node, fn);
5182 suffix = "d2"; /* pow -> powd2 */
5184 else if (el_mode == SFmode && n == 4)
5186 bdecl = mathfn_built_in (float_type_node, fn);
5187 suffix = "4"; /* powf -> powf4 */
5189 else
5190 return NULL_TREE;
5191 if (!bdecl)
5192 return NULL_TREE;
5193 break;
5195 default:
5196 return NULL_TREE;
5199 gcc_assert (suffix != NULL);
5200 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5201 if (!bname)
5202 return NULL_TREE;
5204 strcpy (name, bname + sizeof ("__builtin_") - 1);
5205 strcat (name, suffix);
5207 if (n_args == 1)
5208 fntype = build_function_type_list (type_out, type_in, NULL);
5209 else if (n_args == 2)
5210 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5211 else
5212 gcc_unreachable ();
5214 /* Build a function declaration for the vectorized function. */
5215 new_fndecl = build_decl (BUILTINS_LOCATION,
5216 FUNCTION_DECL, get_identifier (name), fntype);
5217 TREE_PUBLIC (new_fndecl) = 1;
5218 DECL_EXTERNAL (new_fndecl) = 1;
5219 DECL_IS_NOVOPS (new_fndecl) = 1;
5220 TREE_READONLY (new_fndecl) = 1;
5222 return new_fndecl;
5225 /* Returns a function decl for a vectorized version of the builtin function
5226 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5227 if it is not available. */
5229 static tree
5230 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5231 tree type_in)
5233 machine_mode in_mode, out_mode;
5234 int in_n, out_n;
5236 if (TARGET_DEBUG_BUILTIN)
5237 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5238 combined_fn_name (combined_fn (fn)),
5239 GET_MODE_NAME (TYPE_MODE (type_out)),
5240 GET_MODE_NAME (TYPE_MODE (type_in)));
5242 if (TREE_CODE (type_out) != VECTOR_TYPE
5243 || TREE_CODE (type_in) != VECTOR_TYPE)
5244 return NULL_TREE;
5246 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5247 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5248 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5249 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5251 switch (fn)
5253 CASE_CFN_COPYSIGN:
5254 if (VECTOR_UNIT_VSX_P (V2DFmode)
5255 && out_mode == DFmode && out_n == 2
5256 && in_mode == DFmode && in_n == 2)
5257 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5258 if (VECTOR_UNIT_VSX_P (V4SFmode)
5259 && out_mode == SFmode && out_n == 4
5260 && in_mode == SFmode && in_n == 4)
5261 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5262 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5263 && out_mode == SFmode && out_n == 4
5264 && in_mode == SFmode && in_n == 4)
5265 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5266 break;
5267 CASE_CFN_CEIL:
5268 if (VECTOR_UNIT_VSX_P (V2DFmode)
5269 && out_mode == DFmode && out_n == 2
5270 && in_mode == DFmode && in_n == 2)
5271 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5272 if (VECTOR_UNIT_VSX_P (V4SFmode)
5273 && out_mode == SFmode && out_n == 4
5274 && in_mode == SFmode && in_n == 4)
5275 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5276 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5277 && out_mode == SFmode && out_n == 4
5278 && in_mode == SFmode && in_n == 4)
5279 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5280 break;
5281 CASE_CFN_FLOOR:
5282 if (VECTOR_UNIT_VSX_P (V2DFmode)
5283 && out_mode == DFmode && out_n == 2
5284 && in_mode == DFmode && in_n == 2)
5285 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5286 if (VECTOR_UNIT_VSX_P (V4SFmode)
5287 && out_mode == SFmode && out_n == 4
5288 && in_mode == SFmode && in_n == 4)
5289 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5290 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5291 && out_mode == SFmode && out_n == 4
5292 && in_mode == SFmode && in_n == 4)
5293 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5294 break;
5295 CASE_CFN_FMA:
5296 if (VECTOR_UNIT_VSX_P (V2DFmode)
5297 && out_mode == DFmode && out_n == 2
5298 && in_mode == DFmode && in_n == 2)
5299 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5300 if (VECTOR_UNIT_VSX_P (V4SFmode)
5301 && out_mode == SFmode && out_n == 4
5302 && in_mode == SFmode && in_n == 4)
5303 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5304 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5305 && out_mode == SFmode && out_n == 4
5306 && in_mode == SFmode && in_n == 4)
5307 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5308 break;
5309 CASE_CFN_TRUNC:
5310 if (VECTOR_UNIT_VSX_P (V2DFmode)
5311 && out_mode == DFmode && out_n == 2
5312 && in_mode == DFmode && in_n == 2)
5313 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5314 if (VECTOR_UNIT_VSX_P (V4SFmode)
5315 && out_mode == SFmode && out_n == 4
5316 && in_mode == SFmode && in_n == 4)
5317 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5318 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5319 && out_mode == SFmode && out_n == 4
5320 && in_mode == SFmode && in_n == 4)
5321 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5322 break;
5323 CASE_CFN_NEARBYINT:
5324 if (VECTOR_UNIT_VSX_P (V2DFmode)
5325 && flag_unsafe_math_optimizations
5326 && out_mode == DFmode && out_n == 2
5327 && in_mode == DFmode && in_n == 2)
5328 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5329 if (VECTOR_UNIT_VSX_P (V4SFmode)
5330 && flag_unsafe_math_optimizations
5331 && out_mode == SFmode && out_n == 4
5332 && in_mode == SFmode && in_n == 4)
5333 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5334 break;
5335 CASE_CFN_RINT:
5336 if (VECTOR_UNIT_VSX_P (V2DFmode)
5337 && !flag_trapping_math
5338 && out_mode == DFmode && out_n == 2
5339 && in_mode == DFmode && in_n == 2)
5340 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5341 if (VECTOR_UNIT_VSX_P (V4SFmode)
5342 && !flag_trapping_math
5343 && out_mode == SFmode && out_n == 4
5344 && in_mode == SFmode && in_n == 4)
5345 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5346 break;
5347 default:
5348 break;
5351 /* Generate calls to libmass if appropriate. */
5352 if (rs6000_veclib_handler)
5353 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5355 return NULL_TREE;
5358 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5360 static tree
5361 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5362 tree type_in)
5364 machine_mode in_mode, out_mode;
5365 int in_n, out_n;
5367 if (TARGET_DEBUG_BUILTIN)
5368 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5369 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5370 GET_MODE_NAME (TYPE_MODE (type_out)),
5371 GET_MODE_NAME (TYPE_MODE (type_in)));
5373 if (TREE_CODE (type_out) != VECTOR_TYPE
5374 || TREE_CODE (type_in) != VECTOR_TYPE)
5375 return NULL_TREE;
5377 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5378 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5379 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5380 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5382 enum rs6000_builtins fn
5383 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5384 switch (fn)
5386 case RS6000_BUILTIN_RSQRTF:
5387 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5388 && out_mode == SFmode && out_n == 4
5389 && in_mode == SFmode && in_n == 4)
5390 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5391 break;
5392 case RS6000_BUILTIN_RSQRT:
5393 if (VECTOR_UNIT_VSX_P (V2DFmode)
5394 && out_mode == DFmode && out_n == 2
5395 && in_mode == DFmode && in_n == 2)
5396 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5397 break;
5398 case RS6000_BUILTIN_RECIPF:
5399 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5400 && out_mode == SFmode && out_n == 4
5401 && in_mode == SFmode && in_n == 4)
5402 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5403 break;
5404 case RS6000_BUILTIN_RECIP:
5405 if (VECTOR_UNIT_VSX_P (V2DFmode)
5406 && out_mode == DFmode && out_n == 2
5407 && in_mode == DFmode && in_n == 2)
5408 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5409 break;
5410 default:
5411 break;
5413 return NULL_TREE;
5416 /* Default CPU string for rs6000*_file_start functions. */
5417 static const char *rs6000_default_cpu;
5419 #ifdef USING_ELFOS_H
5420 const char *rs6000_machine;
5422 const char *
5423 rs6000_machine_from_flags (void)
5425 HOST_WIDE_INT flags = rs6000_isa_flags;
5427 /* Disable the flags that should never influence the .machine selection. */
5428 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5430 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5431 return "future";
5432 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5433 return "power9";
5434 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5435 return "power8";
5436 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5437 return "power7";
5438 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5439 return "power6";
5440 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5441 return "power5";
5442 if ((flags & ISA_2_1_MASKS) != 0)
5443 return "power4";
5444 if ((flags & OPTION_MASK_POWERPC64) != 0)
5445 return "ppc64";
5446 return "ppc";
5449 void
5450 emit_asm_machine (void)
5452 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5454 #endif
5456 /* Do anything needed at the start of the asm file. */
5458 static void
5459 rs6000_file_start (void)
5461 char buffer[80];
5462 const char *start = buffer;
5463 FILE *file = asm_out_file;
5465 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5467 default_file_start ();
5469 if (flag_verbose_asm)
5471 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5473 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5475 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5476 start = "";
5479 if (global_options_set.x_rs6000_cpu_index)
5481 fprintf (file, "%s -mcpu=%s", start,
5482 processor_target_table[rs6000_cpu_index].name);
5483 start = "";
5486 if (global_options_set.x_rs6000_tune_index)
5488 fprintf (file, "%s -mtune=%s", start,
5489 processor_target_table[rs6000_tune_index].name);
5490 start = "";
5493 if (PPC405_ERRATUM77)
5495 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5496 start = "";
5499 #ifdef USING_ELFOS_H
5500 switch (rs6000_sdata)
5502 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5503 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5504 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5505 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5508 if (rs6000_sdata && g_switch_value)
5510 fprintf (file, "%s -G %d", start,
5511 g_switch_value);
5512 start = "";
5514 #endif
5516 if (*start == '\0')
5517 putc ('\n', file);
5520 #ifdef USING_ELFOS_H
5521 rs6000_machine = rs6000_machine_from_flags ();
5522 emit_asm_machine ();
5523 #endif
5525 if (DEFAULT_ABI == ABI_ELFv2)
5526 fprintf (file, "\t.abiversion 2\n");
5530 /* Return nonzero if this function is known to have a null epilogue. */
5533 direct_return (void)
5535 if (reload_completed)
5537 rs6000_stack_t *info = rs6000_stack_info ();
5539 if (info->first_gp_reg_save == 32
5540 && info->first_fp_reg_save == 64
5541 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5542 && ! info->lr_save_p
5543 && ! info->cr_save_p
5544 && info->vrsave_size == 0
5545 && ! info->push_p)
5546 return 1;
5549 return 0;
5552 /* Helper for num_insns_constant. Calculate number of instructions to
5553 load VALUE to a single gpr using combinations of addi, addis, ori,
5554 oris and sldi instructions. */
5556 static int
5557 num_insns_constant_gpr (HOST_WIDE_INT value)
5559 /* signed constant loadable with addi */
5560 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5561 return 1;
5563 /* constant loadable with addis */
5564 else if ((value & 0xffff) == 0
5565 && (value >> 31 == -1 || value >> 31 == 0))
5566 return 1;
5568 else if (TARGET_POWERPC64)
5570 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5571 HOST_WIDE_INT high = value >> 31;
5573 if (high == 0 || high == -1)
5574 return 2;
5576 high >>= 1;
5578 if (low == 0)
5579 return num_insns_constant_gpr (high) + 1;
5580 else if (high == 0)
5581 return num_insns_constant_gpr (low) + 1;
5582 else
5583 return (num_insns_constant_gpr (high)
5584 + num_insns_constant_gpr (low) + 1);
5587 else
5588 return 2;
5591 /* Helper for num_insns_constant. Allow constants formed by the
5592 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5593 and handle modes that require multiple gprs. */
5595 static int
5596 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5598 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5599 int total = 0;
5600 while (nregs-- > 0)
5602 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5603 int insns = num_insns_constant_gpr (low);
5604 if (insns > 2
5605 /* We won't get more than 2 from num_insns_constant_gpr
5606 except when TARGET_POWERPC64 and mode is DImode or
5607 wider, so the register mode must be DImode. */
5608 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5609 insns = 2;
5610 total += insns;
5611 value >>= BITS_PER_WORD;
5613 return total;
5616 /* Return the number of instructions it takes to form a constant in as
5617 many gprs are needed for MODE. */
5620 num_insns_constant (rtx op, machine_mode mode)
5622 HOST_WIDE_INT val;
5624 switch (GET_CODE (op))
5626 case CONST_INT:
5627 val = INTVAL (op);
5628 break;
5630 case CONST_WIDE_INT:
5632 int insns = 0;
5633 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5634 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5635 DImode);
5636 return insns;
5639 case CONST_DOUBLE:
5641 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5643 if (mode == SFmode || mode == SDmode)
5645 long l;
5647 if (mode == SDmode)
5648 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5649 else
5650 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5651 /* See the first define_split in rs6000.md handling a
5652 const_double_operand. */
5653 val = l;
5654 mode = SImode;
5656 else if (mode == DFmode || mode == DDmode)
5658 long l[2];
5660 if (mode == DDmode)
5661 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5662 else
5663 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5665 /* See the second (32-bit) and third (64-bit) define_split
5666 in rs6000.md handling a const_double_operand. */
5667 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5668 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5669 mode = DImode;
5671 else if (mode == TFmode || mode == TDmode
5672 || mode == KFmode || mode == IFmode)
5674 long l[4];
5675 int insns;
5677 if (mode == TDmode)
5678 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5679 else
5680 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5682 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5683 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5684 insns = num_insns_constant_multi (val, DImode);
5685 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5686 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5687 insns += num_insns_constant_multi (val, DImode);
5688 return insns;
5690 else
5691 gcc_unreachable ();
5693 break;
5695 default:
5696 gcc_unreachable ();
5699 return num_insns_constant_multi (val, mode);
5702 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5703 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5704 corresponding element of the vector, but for V4SFmode, the
5705 corresponding "float" is interpreted as an SImode integer. */
5707 HOST_WIDE_INT
5708 const_vector_elt_as_int (rtx op, unsigned int elt)
5710 rtx tmp;
5712 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5713 gcc_assert (GET_MODE (op) != V2DImode
5714 && GET_MODE (op) != V2DFmode);
5716 tmp = CONST_VECTOR_ELT (op, elt);
5717 if (GET_MODE (op) == V4SFmode)
5718 tmp = gen_lowpart (SImode, tmp);
5719 return INTVAL (tmp);
5722 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5723 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5724 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5725 all items are set to the same value and contain COPIES replicas of the
5726 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5727 operand and the others are set to the value of the operand's msb. */
5729 static bool
5730 vspltis_constant (rtx op, unsigned step, unsigned copies)
5732 machine_mode mode = GET_MODE (op);
5733 machine_mode inner = GET_MODE_INNER (mode);
5735 unsigned i;
5736 unsigned nunits;
5737 unsigned bitsize;
5738 unsigned mask;
5740 HOST_WIDE_INT val;
5741 HOST_WIDE_INT splat_val;
5742 HOST_WIDE_INT msb_val;
5744 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5745 return false;
5747 nunits = GET_MODE_NUNITS (mode);
5748 bitsize = GET_MODE_BITSIZE (inner);
5749 mask = GET_MODE_MASK (inner);
5751 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5752 splat_val = val;
5753 msb_val = val >= 0 ? 0 : -1;
5755 /* Construct the value to be splatted, if possible. If not, return 0. */
5756 for (i = 2; i <= copies; i *= 2)
5758 HOST_WIDE_INT small_val;
5759 bitsize /= 2;
5760 small_val = splat_val >> bitsize;
5761 mask >>= bitsize;
5762 if (splat_val != ((HOST_WIDE_INT)
5763 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5764 | (small_val & mask)))
5765 return false;
5766 splat_val = small_val;
5769 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5770 if (EASY_VECTOR_15 (splat_val))
5773 /* Also check if we can splat, and then add the result to itself. Do so if
5774 the value is positive, of if the splat instruction is using OP's mode;
5775 for splat_val < 0, the splat and the add should use the same mode. */
5776 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5777 && (splat_val >= 0 || (step == 1 && copies == 1)))
5780 /* Also check if are loading up the most significant bit which can be done by
5781 loading up -1 and shifting the value left by -1. */
5782 else if (EASY_VECTOR_MSB (splat_val, inner))
5785 else
5786 return false;
5788 /* Check if VAL is present in every STEP-th element, and the
5789 other elements are filled with its most significant bit. */
5790 for (i = 1; i < nunits; ++i)
5792 HOST_WIDE_INT desired_val;
5793 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5794 if ((i & (step - 1)) == 0)
5795 desired_val = val;
5796 else
5797 desired_val = msb_val;
5799 if (desired_val != const_vector_elt_as_int (op, elt))
5800 return false;
5803 return true;
5806 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5807 instruction, filling in the bottom elements with 0 or -1.
5809 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5810 for the number of zeroes to shift in, or negative for the number of 0xff
5811 bytes to shift in.
5813 OP is a CONST_VECTOR. */
5816 vspltis_shifted (rtx op)
5818 machine_mode mode = GET_MODE (op);
5819 machine_mode inner = GET_MODE_INNER (mode);
5821 unsigned i, j;
5822 unsigned nunits;
5823 unsigned mask;
5825 HOST_WIDE_INT val;
5827 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5828 return false;
5830 /* We need to create pseudo registers to do the shift, so don't recognize
5831 shift vector constants after reload. */
5832 if (!can_create_pseudo_p ())
5833 return false;
5835 nunits = GET_MODE_NUNITS (mode);
5836 mask = GET_MODE_MASK (inner);
5838 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5840 /* Check if the value can really be the operand of a vspltis[bhw]. */
5841 if (EASY_VECTOR_15 (val))
5844 /* Also check if we are loading up the most significant bit which can be done
5845 by loading up -1 and shifting the value left by -1. */
5846 else if (EASY_VECTOR_MSB (val, inner))
5849 else
5850 return 0;
5852 /* Check if VAL is present in every STEP-th element until we find elements
5853 that are 0 or all 1 bits. */
5854 for (i = 1; i < nunits; ++i)
5856 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5857 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5859 /* If the value isn't the splat value, check for the remaining elements
5860 being 0/-1. */
5861 if (val != elt_val)
5863 if (elt_val == 0)
5865 for (j = i+1; j < nunits; ++j)
5867 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5868 if (const_vector_elt_as_int (op, elt2) != 0)
5869 return 0;
5872 return (nunits - i) * GET_MODE_SIZE (inner);
5875 else if ((elt_val & mask) == mask)
5877 for (j = i+1; j < nunits; ++j)
5879 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5880 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5881 return 0;
5884 return -((nunits - i) * GET_MODE_SIZE (inner));
5887 else
5888 return 0;
5892 /* If all elements are equal, we don't need to do VLSDOI. */
5893 return 0;
5897 /* Return true if OP is of the given MODE and can be synthesized
5898 with a vspltisb, vspltish or vspltisw. */
5900 bool
5901 easy_altivec_constant (rtx op, machine_mode mode)
5903 unsigned step, copies;
5905 if (mode == VOIDmode)
5906 mode = GET_MODE (op);
5907 else if (mode != GET_MODE (op))
5908 return false;
5910 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5911 constants. */
5912 if (mode == V2DFmode)
5913 return zero_constant (op, mode);
5915 else if (mode == V2DImode)
5917 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5918 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5919 return false;
5921 if (zero_constant (op, mode))
5922 return true;
5924 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5925 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5926 return true;
5928 return false;
5931 /* V1TImode is a special container for TImode. Ignore for now. */
5932 else if (mode == V1TImode)
5933 return false;
5935 /* Start with a vspltisw. */
5936 step = GET_MODE_NUNITS (mode) / 4;
5937 copies = 1;
5939 if (vspltis_constant (op, step, copies))
5940 return true;
5942 /* Then try with a vspltish. */
5943 if (step == 1)
5944 copies <<= 1;
5945 else
5946 step >>= 1;
5948 if (vspltis_constant (op, step, copies))
5949 return true;
5951 /* And finally a vspltisb. */
5952 if (step == 1)
5953 copies <<= 1;
5954 else
5955 step >>= 1;
5957 if (vspltis_constant (op, step, copies))
5958 return true;
5960 if (vspltis_shifted (op) != 0)
5961 return true;
5963 return false;
5966 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5967 result is OP. Abort if it is not possible. */
5970 gen_easy_altivec_constant (rtx op)
5972 machine_mode mode = GET_MODE (op);
5973 int nunits = GET_MODE_NUNITS (mode);
5974 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5975 unsigned step = nunits / 4;
5976 unsigned copies = 1;
5978 /* Start with a vspltisw. */
5979 if (vspltis_constant (op, step, copies))
5980 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5982 /* Then try with a vspltish. */
5983 if (step == 1)
5984 copies <<= 1;
5985 else
5986 step >>= 1;
5988 if (vspltis_constant (op, step, copies))
5989 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5991 /* And finally a vspltisb. */
5992 if (step == 1)
5993 copies <<= 1;
5994 else
5995 step >>= 1;
5997 if (vspltis_constant (op, step, copies))
5998 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6000 gcc_unreachable ();
6003 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6004 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6006 Return the number of instructions needed (1 or 2) into the address pointed
6007 via NUM_INSNS_PTR.
6009 Return the constant that is being split via CONSTANT_PTR. */
6011 bool
6012 xxspltib_constant_p (rtx op,
6013 machine_mode mode,
6014 int *num_insns_ptr,
6015 int *constant_ptr)
6017 size_t nunits = GET_MODE_NUNITS (mode);
6018 size_t i;
6019 HOST_WIDE_INT value;
6020 rtx element;
6022 /* Set the returned values to out of bound values. */
6023 *num_insns_ptr = -1;
6024 *constant_ptr = 256;
6026 if (!TARGET_P9_VECTOR)
6027 return false;
6029 if (mode == VOIDmode)
6030 mode = GET_MODE (op);
6032 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6033 return false;
6035 /* Handle (vec_duplicate <constant>). */
6036 if (GET_CODE (op) == VEC_DUPLICATE)
6038 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6039 && mode != V2DImode)
6040 return false;
6042 element = XEXP (op, 0);
6043 if (!CONST_INT_P (element))
6044 return false;
6046 value = INTVAL (element);
6047 if (!IN_RANGE (value, -128, 127))
6048 return false;
6051 /* Handle (const_vector [...]). */
6052 else if (GET_CODE (op) == CONST_VECTOR)
6054 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6055 && mode != V2DImode)
6056 return false;
6058 element = CONST_VECTOR_ELT (op, 0);
6059 if (!CONST_INT_P (element))
6060 return false;
6062 value = INTVAL (element);
6063 if (!IN_RANGE (value, -128, 127))
6064 return false;
6066 for (i = 1; i < nunits; i++)
6068 element = CONST_VECTOR_ELT (op, i);
6069 if (!CONST_INT_P (element))
6070 return false;
6072 if (value != INTVAL (element))
6073 return false;
6077 /* Handle integer constants being loaded into the upper part of the VSX
6078 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6079 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6080 else if (CONST_INT_P (op))
6082 if (!SCALAR_INT_MODE_P (mode))
6083 return false;
6085 value = INTVAL (op);
6086 if (!IN_RANGE (value, -128, 127))
6087 return false;
6089 if (!IN_RANGE (value, -1, 0))
6091 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6092 return false;
6094 if (EASY_VECTOR_15 (value))
6095 return false;
6099 else
6100 return false;
6102 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6103 sign extend. Special case 0/-1 to allow getting any VSX register instead
6104 of an Altivec register. */
6105 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6106 && EASY_VECTOR_15 (value))
6107 return false;
6109 /* Return # of instructions and the constant byte for XXSPLTIB. */
6110 if (mode == V16QImode)
6111 *num_insns_ptr = 1;
6113 else if (IN_RANGE (value, -1, 0))
6114 *num_insns_ptr = 1;
6116 else
6117 *num_insns_ptr = 2;
6119 *constant_ptr = (int) value;
6120 return true;
6123 const char *
6124 output_vec_const_move (rtx *operands)
6126 int shift;
6127 machine_mode mode;
6128 rtx dest, vec;
6130 dest = operands[0];
6131 vec = operands[1];
6132 mode = GET_MODE (dest);
6134 if (TARGET_VSX)
6136 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6137 int xxspltib_value = 256;
6138 int num_insns = -1;
6140 if (zero_constant (vec, mode))
6142 if (TARGET_P9_VECTOR)
6143 return "xxspltib %x0,0";
6145 else if (dest_vmx_p)
6146 return "vspltisw %0,0";
6148 else
6149 return "xxlxor %x0,%x0,%x0";
6152 if (all_ones_constant (vec, mode))
6154 if (TARGET_P9_VECTOR)
6155 return "xxspltib %x0,255";
6157 else if (dest_vmx_p)
6158 return "vspltisw %0,-1";
6160 else if (TARGET_P8_VECTOR)
6161 return "xxlorc %x0,%x0,%x0";
6163 else
6164 gcc_unreachable ();
6167 if (TARGET_P9_VECTOR
6168 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6170 if (num_insns == 1)
6172 operands[2] = GEN_INT (xxspltib_value & 0xff);
6173 return "xxspltib %x0,%2";
6176 return "#";
6180 if (TARGET_ALTIVEC)
6182 rtx splat_vec;
6184 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6185 if (zero_constant (vec, mode))
6186 return "vspltisw %0,0";
6188 if (all_ones_constant (vec, mode))
6189 return "vspltisw %0,-1";
6191 /* Do we need to construct a value using VSLDOI? */
6192 shift = vspltis_shifted (vec);
6193 if (shift != 0)
6194 return "#";
6196 splat_vec = gen_easy_altivec_constant (vec);
6197 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6198 operands[1] = XEXP (splat_vec, 0);
6199 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6200 return "#";
6202 switch (GET_MODE (splat_vec))
6204 case E_V4SImode:
6205 return "vspltisw %0,%1";
6207 case E_V8HImode:
6208 return "vspltish %0,%1";
6210 case E_V16QImode:
6211 return "vspltisb %0,%1";
6213 default:
6214 gcc_unreachable ();
6218 gcc_unreachable ();
6221 /* Initialize vector TARGET to VALS. */
6223 void
6224 rs6000_expand_vector_init (rtx target, rtx vals)
6226 machine_mode mode = GET_MODE (target);
6227 machine_mode inner_mode = GET_MODE_INNER (mode);
6228 int n_elts = GET_MODE_NUNITS (mode);
6229 int n_var = 0, one_var = -1;
6230 bool all_same = true, all_const_zero = true;
6231 rtx x, mem;
6232 int i;
6234 for (i = 0; i < n_elts; ++i)
6236 x = XVECEXP (vals, 0, i);
6237 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6238 ++n_var, one_var = i;
6239 else if (x != CONST0_RTX (inner_mode))
6240 all_const_zero = false;
6242 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6243 all_same = false;
6246 if (n_var == 0)
6248 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6249 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6250 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6252 /* Zero register. */
6253 emit_move_insn (target, CONST0_RTX (mode));
6254 return;
6256 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6258 /* Splat immediate. */
6259 emit_insn (gen_rtx_SET (target, const_vec));
6260 return;
6262 else
6264 /* Load from constant pool. */
6265 emit_move_insn (target, const_vec);
6266 return;
6270 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6271 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6273 rtx op[2];
6274 size_t i;
6275 size_t num_elements = all_same ? 1 : 2;
6276 for (i = 0; i < num_elements; i++)
6278 op[i] = XVECEXP (vals, 0, i);
6279 /* Just in case there is a SUBREG with a smaller mode, do a
6280 conversion. */
6281 if (GET_MODE (op[i]) != inner_mode)
6283 rtx tmp = gen_reg_rtx (inner_mode);
6284 convert_move (tmp, op[i], 0);
6285 op[i] = tmp;
6287 /* Allow load with splat double word. */
6288 else if (MEM_P (op[i]))
6290 if (!all_same)
6291 op[i] = force_reg (inner_mode, op[i]);
6293 else if (!REG_P (op[i]))
6294 op[i] = force_reg (inner_mode, op[i]);
6297 if (all_same)
6299 if (mode == V2DFmode)
6300 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6301 else
6302 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6304 else
6306 if (mode == V2DFmode)
6307 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6308 else
6309 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6311 return;
6314 /* Special case initializing vector int if we are on 64-bit systems with
6315 direct move or we have the ISA 3.0 instructions. */
6316 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6317 && TARGET_DIRECT_MOVE_64BIT)
6319 if (all_same)
6321 rtx element0 = XVECEXP (vals, 0, 0);
6322 if (MEM_P (element0))
6323 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6324 else
6325 element0 = force_reg (SImode, element0);
6327 if (TARGET_P9_VECTOR)
6328 emit_insn (gen_vsx_splat_v4si (target, element0));
6329 else
6331 rtx tmp = gen_reg_rtx (DImode);
6332 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6333 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6335 return;
6337 else
6339 rtx elements[4];
6340 size_t i;
6342 for (i = 0; i < 4; i++)
6343 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6345 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6346 elements[2], elements[3]));
6347 return;
6351 /* With single precision floating point on VSX, know that internally single
6352 precision is actually represented as a double, and either make 2 V2DF
6353 vectors, and convert these vectors to single precision, or do one
6354 conversion, and splat the result to the other elements. */
6355 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6357 if (all_same)
6359 rtx element0 = XVECEXP (vals, 0, 0);
6361 if (TARGET_P9_VECTOR)
6363 if (MEM_P (element0))
6364 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6366 emit_insn (gen_vsx_splat_v4sf (target, element0));
6369 else
6371 rtx freg = gen_reg_rtx (V4SFmode);
6372 rtx sreg = force_reg (SFmode, element0);
6373 rtx cvt = (TARGET_XSCVDPSPN
6374 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6375 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6377 emit_insn (cvt);
6378 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6379 const0_rtx));
6382 else
6384 rtx dbl_even = gen_reg_rtx (V2DFmode);
6385 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6386 rtx flt_even = gen_reg_rtx (V4SFmode);
6387 rtx flt_odd = gen_reg_rtx (V4SFmode);
6388 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6389 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6390 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6391 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6393 /* Use VMRGEW if we can instead of doing a permute. */
6394 if (TARGET_P8_VECTOR)
6396 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6397 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6398 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6399 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6400 if (BYTES_BIG_ENDIAN)
6401 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6402 else
6403 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6405 else
6407 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6408 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6409 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6410 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6411 rs6000_expand_extract_even (target, flt_even, flt_odd);
6414 return;
6417 /* Special case initializing vector short/char that are splats if we are on
6418 64-bit systems with direct move. */
6419 if (all_same && TARGET_DIRECT_MOVE_64BIT
6420 && (mode == V16QImode || mode == V8HImode))
6422 rtx op0 = XVECEXP (vals, 0, 0);
6423 rtx di_tmp = gen_reg_rtx (DImode);
6425 if (!REG_P (op0))
6426 op0 = force_reg (GET_MODE_INNER (mode), op0);
6428 if (mode == V16QImode)
6430 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6431 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6432 return;
6435 if (mode == V8HImode)
6437 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6438 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6439 return;
6443 /* Store value to stack temp. Load vector element. Splat. However, splat
6444 of 64-bit items is not supported on Altivec. */
6445 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6447 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6448 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6449 XVECEXP (vals, 0, 0));
6450 x = gen_rtx_UNSPEC (VOIDmode,
6451 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6452 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6453 gen_rtvec (2,
6454 gen_rtx_SET (target, mem),
6455 x)));
6456 x = gen_rtx_VEC_SELECT (inner_mode, target,
6457 gen_rtx_PARALLEL (VOIDmode,
6458 gen_rtvec (1, const0_rtx)));
6459 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6460 return;
6463 /* One field is non-constant. Load constant then overwrite
6464 varying field. */
6465 if (n_var == 1)
6467 rtx copy = copy_rtx (vals);
6469 /* Load constant part of vector, substitute neighboring value for
6470 varying element. */
6471 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6472 rs6000_expand_vector_init (target, copy);
6474 /* Insert variable. */
6475 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6476 return;
6479 /* Construct the vector in memory one field at a time
6480 and load the whole vector. */
6481 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6482 for (i = 0; i < n_elts; i++)
6483 emit_move_insn (adjust_address_nv (mem, inner_mode,
6484 i * GET_MODE_SIZE (inner_mode)),
6485 XVECEXP (vals, 0, i));
6486 emit_move_insn (target, mem);
6489 /* Set field ELT of TARGET to VAL. */
6491 void
6492 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6494 machine_mode mode = GET_MODE (target);
6495 machine_mode inner_mode = GET_MODE_INNER (mode);
6496 rtx reg = gen_reg_rtx (mode);
6497 rtx mask, mem, x;
6498 int width = GET_MODE_SIZE (inner_mode);
6499 int i;
6501 val = force_reg (GET_MODE (val), val);
6503 if (VECTOR_MEM_VSX_P (mode))
6505 rtx insn = NULL_RTX;
6506 rtx elt_rtx = GEN_INT (elt);
6508 if (mode == V2DFmode)
6509 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6511 else if (mode == V2DImode)
6512 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6514 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6516 if (mode == V4SImode)
6517 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6518 else if (mode == V8HImode)
6519 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6520 else if (mode == V16QImode)
6521 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6522 else if (mode == V4SFmode)
6523 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6526 if (insn)
6528 emit_insn (insn);
6529 return;
6533 /* Simplify setting single element vectors like V1TImode. */
6534 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6536 emit_move_insn (target, gen_lowpart (mode, val));
6537 return;
6540 /* Load single variable value. */
6541 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6542 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6543 x = gen_rtx_UNSPEC (VOIDmode,
6544 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6545 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6546 gen_rtvec (2,
6547 gen_rtx_SET (reg, mem),
6548 x)));
6550 /* Linear sequence. */
6551 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6552 for (i = 0; i < 16; ++i)
6553 XVECEXP (mask, 0, i) = GEN_INT (i);
6555 /* Set permute mask to insert element into target. */
6556 for (i = 0; i < width; ++i)
6557 XVECEXP (mask, 0, elt*width + i)
6558 = GEN_INT (i + 0x10);
6559 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6561 if (BYTES_BIG_ENDIAN)
6562 x = gen_rtx_UNSPEC (mode,
6563 gen_rtvec (3, target, reg,
6564 force_reg (V16QImode, x)),
6565 UNSPEC_VPERM);
6566 else
6568 if (TARGET_P9_VECTOR)
6569 x = gen_rtx_UNSPEC (mode,
6570 gen_rtvec (3, reg, target,
6571 force_reg (V16QImode, x)),
6572 UNSPEC_VPERMR);
6573 else
6575 /* Invert selector. We prefer to generate VNAND on P8 so
6576 that future fusion opportunities can kick in, but must
6577 generate VNOR elsewhere. */
6578 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6579 rtx iorx = (TARGET_P8_VECTOR
6580 ? gen_rtx_IOR (V16QImode, notx, notx)
6581 : gen_rtx_AND (V16QImode, notx, notx));
6582 rtx tmp = gen_reg_rtx (V16QImode);
6583 emit_insn (gen_rtx_SET (tmp, iorx));
6585 /* Permute with operands reversed and adjusted selector. */
6586 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6587 UNSPEC_VPERM);
6591 emit_insn (gen_rtx_SET (target, x));
6594 /* Extract field ELT from VEC into TARGET. */
6596 void
6597 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6599 machine_mode mode = GET_MODE (vec);
6600 machine_mode inner_mode = GET_MODE_INNER (mode);
6601 rtx mem;
6603 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6605 switch (mode)
6607 default:
6608 break;
6609 case E_V1TImode:
6610 emit_move_insn (target, gen_lowpart (TImode, vec));
6611 break;
6612 case E_V2DFmode:
6613 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6614 return;
6615 case E_V2DImode:
6616 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6617 return;
6618 case E_V4SFmode:
6619 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6620 return;
6621 case E_V16QImode:
6622 if (TARGET_DIRECT_MOVE_64BIT)
6624 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6625 return;
6627 else
6628 break;
6629 case E_V8HImode:
6630 if (TARGET_DIRECT_MOVE_64BIT)
6632 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6633 return;
6635 else
6636 break;
6637 case E_V4SImode:
6638 if (TARGET_DIRECT_MOVE_64BIT)
6640 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6641 return;
6643 break;
6646 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6647 && TARGET_DIRECT_MOVE_64BIT)
6649 if (GET_MODE (elt) != DImode)
6651 rtx tmp = gen_reg_rtx (DImode);
6652 convert_move (tmp, elt, 0);
6653 elt = tmp;
6655 else if (!REG_P (elt))
6656 elt = force_reg (DImode, elt);
6658 switch (mode)
6660 case E_V1TImode:
6661 emit_move_insn (target, gen_lowpart (TImode, vec));
6662 return;
6664 case E_V2DFmode:
6665 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6666 return;
6668 case E_V2DImode:
6669 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6670 return;
6672 case E_V4SFmode:
6673 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6674 return;
6676 case E_V4SImode:
6677 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6678 return;
6680 case E_V8HImode:
6681 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6682 return;
6684 case E_V16QImode:
6685 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6686 return;
6688 default:
6689 gcc_unreachable ();
6693 /* Allocate mode-sized buffer. */
6694 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6696 emit_move_insn (mem, vec);
6697 if (CONST_INT_P (elt))
6699 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6701 /* Add offset to field within buffer matching vector element. */
6702 mem = adjust_address_nv (mem, inner_mode,
6703 modulo_elt * GET_MODE_SIZE (inner_mode));
6704 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6706 else
6708 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6709 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6710 rtx new_addr = gen_reg_rtx (Pmode);
6712 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6713 if (ele_size > 1)
6714 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6715 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6716 new_addr = change_address (mem, inner_mode, new_addr);
6717 emit_move_insn (target, new_addr);
6721 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6722 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6723 temporary (BASE_TMP) to fixup the address. Return the new memory address
6724 that is valid for reads or writes to a given register (SCALAR_REG). */
6727 rs6000_adjust_vec_address (rtx scalar_reg,
6728 rtx mem,
6729 rtx element,
6730 rtx base_tmp,
6731 machine_mode scalar_mode)
6733 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6734 rtx addr = XEXP (mem, 0);
6735 rtx element_offset;
6736 rtx new_addr;
6737 bool valid_addr_p;
6739 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6740 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6742 /* Calculate what we need to add to the address to get the element
6743 address. */
6744 if (CONST_INT_P (element))
6745 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6746 else
6748 int byte_shift = exact_log2 (scalar_size);
6749 gcc_assert (byte_shift >= 0);
6751 if (byte_shift == 0)
6752 element_offset = element;
6754 else
6756 if (TARGET_POWERPC64)
6757 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6758 else
6759 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6761 element_offset = base_tmp;
6765 /* Create the new address pointing to the element within the vector. If we
6766 are adding 0, we don't have to change the address. */
6767 if (element_offset == const0_rtx)
6768 new_addr = addr;
6770 /* A simple indirect address can be converted into a reg + offset
6771 address. */
6772 else if (REG_P (addr) || SUBREG_P (addr))
6773 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6775 /* Optimize D-FORM addresses with constant offset with a constant element, to
6776 include the element offset in the address directly. */
6777 else if (GET_CODE (addr) == PLUS)
6779 rtx op0 = XEXP (addr, 0);
6780 rtx op1 = XEXP (addr, 1);
6781 rtx insn;
6783 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6784 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6786 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6787 rtx offset_rtx = GEN_INT (offset);
6789 if (IN_RANGE (offset, -32768, 32767)
6790 && (scalar_size < 8 || (offset & 0x3) == 0))
6791 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6792 else
6794 emit_move_insn (base_tmp, offset_rtx);
6795 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6798 else
6800 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6801 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6803 /* Note, ADDI requires the register being added to be a base
6804 register. If the register was R0, load it up into the temporary
6805 and do the add. */
6806 if (op1_reg_p
6807 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6809 insn = gen_add3_insn (base_tmp, op1, element_offset);
6810 gcc_assert (insn != NULL_RTX);
6811 emit_insn (insn);
6814 else if (ele_reg_p
6815 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6817 insn = gen_add3_insn (base_tmp, element_offset, op1);
6818 gcc_assert (insn != NULL_RTX);
6819 emit_insn (insn);
6822 else
6824 emit_move_insn (base_tmp, op1);
6825 emit_insn (gen_add2_insn (base_tmp, element_offset));
6828 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6832 else
6834 emit_move_insn (base_tmp, addr);
6835 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6838 /* If we have a PLUS, we need to see whether the particular register class
6839 allows for D-FORM or X-FORM addressing. */
6840 if (GET_CODE (new_addr) == PLUS)
6842 rtx op1 = XEXP (new_addr, 1);
6843 addr_mask_type addr_mask;
6844 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6846 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6847 if (INT_REGNO_P (scalar_regno))
6848 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6850 else if (FP_REGNO_P (scalar_regno))
6851 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6853 else if (ALTIVEC_REGNO_P (scalar_regno))
6854 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6856 else
6857 gcc_unreachable ();
6859 if (REG_P (op1) || SUBREG_P (op1))
6860 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6861 else
6862 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6865 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6866 valid_addr_p = true;
6868 else
6869 valid_addr_p = false;
6871 if (!valid_addr_p)
6873 emit_move_insn (base_tmp, new_addr);
6874 new_addr = base_tmp;
6877 return change_address (mem, scalar_mode, new_addr);
6880 /* Split a variable vec_extract operation into the component instructions. */
6882 void
6883 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6884 rtx tmp_altivec)
6886 machine_mode mode = GET_MODE (src);
6887 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6888 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6889 int byte_shift = exact_log2 (scalar_size);
6891 gcc_assert (byte_shift >= 0);
6893 /* If we are given a memory address, optimize to load just the element. We
6894 don't have to adjust the vector element number on little endian
6895 systems. */
6896 if (MEM_P (src))
6898 int num_elements = GET_MODE_NUNITS (mode);
6899 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6901 emit_insn (gen_anddi3 (element, element, num_ele_m1));
6902 gcc_assert (REG_P (tmp_gpr));
6903 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
6904 tmp_gpr, scalar_mode));
6905 return;
6908 else if (REG_P (src) || SUBREG_P (src))
6910 int num_elements = GET_MODE_NUNITS (mode);
6911 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6912 int bit_shift = 7 - exact_log2 (num_elements);
6913 rtx element2;
6914 unsigned int dest_regno = reg_or_subregno (dest);
6915 unsigned int src_regno = reg_or_subregno (src);
6916 unsigned int element_regno = reg_or_subregno (element);
6918 gcc_assert (REG_P (tmp_gpr));
6920 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6921 a general purpose register. */
6922 if (TARGET_P9_VECTOR
6923 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6924 && INT_REGNO_P (dest_regno)
6925 && ALTIVEC_REGNO_P (src_regno)
6926 && INT_REGNO_P (element_regno))
6928 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6929 rtx element_si = gen_rtx_REG (SImode, element_regno);
6931 if (mode == V16QImode)
6932 emit_insn (BYTES_BIG_ENDIAN
6933 ? gen_vextublx (dest_si, element_si, src)
6934 : gen_vextubrx (dest_si, element_si, src));
6936 else if (mode == V8HImode)
6938 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6939 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6940 emit_insn (BYTES_BIG_ENDIAN
6941 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6942 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6946 else
6948 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6949 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
6950 emit_insn (BYTES_BIG_ENDIAN
6951 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
6952 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
6955 return;
6959 gcc_assert (REG_P (tmp_altivec));
6961 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6962 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6963 will shift the element into the upper position (adding 3 to convert a
6964 byte shift into a bit shift). */
6965 if (scalar_size == 8)
6967 if (!BYTES_BIG_ENDIAN)
6969 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
6970 element2 = tmp_gpr;
6972 else
6973 element2 = element;
6975 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
6976 bit. */
6977 emit_insn (gen_rtx_SET (tmp_gpr,
6978 gen_rtx_AND (DImode,
6979 gen_rtx_ASHIFT (DImode,
6980 element2,
6981 GEN_INT (6)),
6982 GEN_INT (64))));
6984 else
6986 if (!BYTES_BIG_ENDIAN)
6988 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6990 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
6991 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
6992 element2 = tmp_gpr;
6994 else
6995 element2 = element;
6997 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7000 /* Get the value into the lower byte of the Altivec register where VSLO
7001 expects it. */
7002 if (TARGET_P9_VECTOR)
7003 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7004 else if (can_create_pseudo_p ())
7005 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7006 else
7008 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7009 emit_move_insn (tmp_di, tmp_gpr);
7010 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7013 /* Do the VSLO to get the value into the final location. */
7014 switch (mode)
7016 case E_V2DFmode:
7017 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7018 return;
7020 case E_V2DImode:
7021 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7022 return;
7024 case E_V4SFmode:
7026 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7027 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7028 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7029 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7030 tmp_altivec));
7032 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7033 return;
7036 case E_V4SImode:
7037 case E_V8HImode:
7038 case E_V16QImode:
7040 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7041 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7042 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7043 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7044 tmp_altivec));
7045 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7046 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7047 GEN_INT (64 - bits_in_element)));
7048 return;
7051 default:
7052 gcc_unreachable ();
7055 return;
7057 else
7058 gcc_unreachable ();
7061 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7062 selects whether the alignment is abi mandated, optional, or
7063 both abi and optional alignment. */
7065 unsigned int
7066 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7068 if (how != align_opt)
7070 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7071 align = 128;
7074 if (how != align_abi)
7076 if (TREE_CODE (type) == ARRAY_TYPE
7077 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7079 if (align < BITS_PER_WORD)
7080 align = BITS_PER_WORD;
7084 return align;
7087 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7088 instructions simply ignore the low bits; VSX memory instructions
7089 are aligned to 4 or 8 bytes. */
7091 static bool
7092 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7094 return (STRICT_ALIGNMENT
7095 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7096 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7097 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7098 && (int) align < VECTOR_ALIGN (mode)))));
7101 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7103 bool
7104 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7106 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7108 if (computed != 128)
7110 static bool warned;
7111 if (!warned && warn_psabi)
7113 warned = true;
7114 inform (input_location,
7115 "the layout of aggregates containing vectors with"
7116 " %d-byte alignment has changed in GCC 5",
7117 computed / BITS_PER_UNIT);
7120 /* In current GCC there is no special case. */
7121 return false;
7124 return false;
7127 /* AIX increases natural record alignment to doubleword if the first
7128 field is an FP double while the FP fields remain word aligned. */
7130 unsigned int
7131 rs6000_special_round_type_align (tree type, unsigned int computed,
7132 unsigned int specified)
7134 unsigned int align = MAX (computed, specified);
7135 tree field = TYPE_FIELDS (type);
7137 /* Skip all non field decls */
7138 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7139 field = DECL_CHAIN (field);
7141 if (field != NULL && field != type)
7143 type = TREE_TYPE (field);
7144 while (TREE_CODE (type) == ARRAY_TYPE)
7145 type = TREE_TYPE (type);
7147 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7148 align = MAX (align, 64);
7151 return align;
7154 /* Darwin increases record alignment to the natural alignment of
7155 the first field. */
7157 unsigned int
7158 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7159 unsigned int specified)
7161 unsigned int align = MAX (computed, specified);
7163 if (TYPE_PACKED (type))
7164 return align;
7166 /* Find the first field, looking down into aggregates. */
7167 do {
7168 tree field = TYPE_FIELDS (type);
7169 /* Skip all non field decls */
7170 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7171 field = DECL_CHAIN (field);
7172 if (! field)
7173 break;
7174 /* A packed field does not contribute any extra alignment. */
7175 if (DECL_PACKED (field))
7176 return align;
7177 type = TREE_TYPE (field);
7178 while (TREE_CODE (type) == ARRAY_TYPE)
7179 type = TREE_TYPE (type);
7180 } while (AGGREGATE_TYPE_P (type));
7182 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7183 align = MAX (align, TYPE_ALIGN (type));
7185 return align;
7188 /* Return 1 for an operand in small memory on V.4/eabi. */
7191 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7192 machine_mode mode ATTRIBUTE_UNUSED)
7194 #if TARGET_ELF
7195 rtx sym_ref;
7197 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7198 return 0;
7200 if (DEFAULT_ABI != ABI_V4)
7201 return 0;
7203 if (SYMBOL_REF_P (op))
7204 sym_ref = op;
7206 else if (GET_CODE (op) != CONST
7207 || GET_CODE (XEXP (op, 0)) != PLUS
7208 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7209 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7210 return 0;
7212 else
7214 rtx sum = XEXP (op, 0);
7215 HOST_WIDE_INT summand;
7217 /* We have to be careful here, because it is the referenced address
7218 that must be 32k from _SDA_BASE_, not just the symbol. */
7219 summand = INTVAL (XEXP (sum, 1));
7220 if (summand < 0 || summand > g_switch_value)
7221 return 0;
7223 sym_ref = XEXP (sum, 0);
7226 return SYMBOL_REF_SMALL_P (sym_ref);
7227 #else
7228 return 0;
7229 #endif
7232 /* Return true if either operand is a general purpose register. */
7234 bool
7235 gpr_or_gpr_p (rtx op0, rtx op1)
7237 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7238 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7241 /* Return true if this is a move direct operation between GPR registers and
7242 floating point/VSX registers. */
7244 bool
7245 direct_move_p (rtx op0, rtx op1)
7247 if (!REG_P (op0) || !REG_P (op1))
7248 return false;
7250 if (!TARGET_DIRECT_MOVE)
7251 return false;
7253 int regno0 = REGNO (op0);
7254 int regno1 = REGNO (op1);
7255 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7256 return false;
7258 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7259 return true;
7261 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7262 return true;
7264 return false;
7267 /* Return true if the ADDR is an acceptable address for a quad memory
7268 operation of mode MODE (either LQ/STQ for general purpose registers, or
7269 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7270 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7271 3.0 LXV/STXV instruction. */
7273 bool
7274 quad_address_p (rtx addr, machine_mode mode, bool strict)
7276 rtx op0, op1;
7278 if (GET_MODE_SIZE (mode) != 16)
7279 return false;
7281 if (legitimate_indirect_address_p (addr, strict))
7282 return true;
7284 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7285 return false;
7287 /* Is this a valid prefixed address? If the bottom four bits of the offset
7288 are non-zero, we could use a prefixed instruction (which does not have the
7289 DQ-form constraint that the traditional instruction had) instead of
7290 forcing the unaligned offset to a GPR. */
7291 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7292 return true;
7294 if (GET_CODE (addr) != PLUS)
7295 return false;
7297 op0 = XEXP (addr, 0);
7298 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7299 return false;
7301 op1 = XEXP (addr, 1);
7302 if (!CONST_INT_P (op1))
7303 return false;
7305 return quad_address_offset_p (INTVAL (op1));
7308 /* Return true if this is a load or store quad operation. This function does
7309 not handle the atomic quad memory instructions. */
7311 bool
7312 quad_load_store_p (rtx op0, rtx op1)
7314 bool ret;
7316 if (!TARGET_QUAD_MEMORY)
7317 ret = false;
7319 else if (REG_P (op0) && MEM_P (op1))
7320 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7321 && quad_memory_operand (op1, GET_MODE (op1))
7322 && !reg_overlap_mentioned_p (op0, op1));
7324 else if (MEM_P (op0) && REG_P (op1))
7325 ret = (quad_memory_operand (op0, GET_MODE (op0))
7326 && quad_int_reg_operand (op1, GET_MODE (op1)));
7328 else
7329 ret = false;
7331 if (TARGET_DEBUG_ADDR)
7333 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7334 ret ? "true" : "false");
7335 debug_rtx (gen_rtx_SET (op0, op1));
7338 return ret;
7341 /* Given an address, return a constant offset term if one exists. */
7343 static rtx
7344 address_offset (rtx op)
7346 if (GET_CODE (op) == PRE_INC
7347 || GET_CODE (op) == PRE_DEC)
7348 op = XEXP (op, 0);
7349 else if (GET_CODE (op) == PRE_MODIFY
7350 || GET_CODE (op) == LO_SUM)
7351 op = XEXP (op, 1);
7353 if (GET_CODE (op) == CONST)
7354 op = XEXP (op, 0);
7356 if (GET_CODE (op) == PLUS)
7357 op = XEXP (op, 1);
7359 if (CONST_INT_P (op))
7360 return op;
7362 return NULL_RTX;
7365 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7366 the mode. If we can't find (or don't know) the alignment of the symbol
7367 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7368 should be pessimistic]. Offsets are validated in the same way as for
7369 reg + offset. */
7370 static bool
7371 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7373 /* We should not get here with this. */
7374 gcc_checking_assert (! mode_supports_dq_form (mode));
7376 if (GET_CODE (x) == CONST)
7377 x = XEXP (x, 0);
7379 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7380 x = XVECEXP (x, 0, 0);
7382 rtx sym = NULL_RTX;
7383 unsigned HOST_WIDE_INT offset = 0;
7385 if (GET_CODE (x) == PLUS)
7387 sym = XEXP (x, 0);
7388 if (! SYMBOL_REF_P (sym))
7389 return false;
7390 if (!CONST_INT_P (XEXP (x, 1)))
7391 return false;
7392 offset = INTVAL (XEXP (x, 1));
7394 else if (SYMBOL_REF_P (x))
7395 sym = x;
7396 else if (CONST_INT_P (x))
7397 offset = INTVAL (x);
7398 else if (GET_CODE (x) == LABEL_REF)
7399 offset = 0; // We assume code labels are Pmode aligned
7400 else
7401 return false; // not sure what we have here.
7403 /* If we don't know the alignment of the thing to which the symbol refers,
7404 we assume optimistically it is "enough".
7405 ??? maybe we should be pessimistic instead. */
7406 unsigned align = 0;
7408 if (sym)
7410 tree decl = SYMBOL_REF_DECL (sym);
7411 #if TARGET_MACHO
7412 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7413 /* The decl in an indirection symbol is the original one, which might
7414 be less aligned than the indirection. Our indirections are always
7415 pointer-aligned. */
7417 else
7418 #endif
7419 if (decl && DECL_ALIGN (decl))
7420 align = DECL_ALIGN_UNIT (decl);
7423 unsigned int extra = 0;
7424 switch (mode)
7426 case E_DFmode:
7427 case E_DDmode:
7428 case E_DImode:
7429 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7430 addressing. */
7431 if (VECTOR_MEM_VSX_P (mode))
7432 return false;
7434 if (!TARGET_POWERPC64)
7435 extra = 4;
7436 else if ((offset & 3) || (align & 3))
7437 return false;
7438 break;
7440 case E_TFmode:
7441 case E_IFmode:
7442 case E_KFmode:
7443 case E_TDmode:
7444 case E_TImode:
7445 case E_PTImode:
7446 extra = 8;
7447 if (!TARGET_POWERPC64)
7448 extra = 12;
7449 else if ((offset & 3) || (align & 3))
7450 return false;
7451 break;
7453 default:
7454 break;
7457 /* We only care if the access(es) would cause a change to the high part. */
7458 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7459 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7462 /* Return true if the MEM operand is a memory operand suitable for use
7463 with a (full width, possibly multiple) gpr load/store. On
7464 powerpc64 this means the offset must be divisible by 4.
7465 Implements 'Y' constraint.
7467 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7468 a constraint function we know the operand has satisfied a suitable
7469 memory predicate.
7471 Offsetting a lo_sum should not be allowed, except where we know by
7472 alignment that a 32k boundary is not crossed. Note that by
7473 "offsetting" here we mean a further offset to access parts of the
7474 MEM. It's fine to have a lo_sum where the inner address is offset
7475 from a sym, since the same sym+offset will appear in the high part
7476 of the address calculation. */
7478 bool
7479 mem_operand_gpr (rtx op, machine_mode mode)
7481 unsigned HOST_WIDE_INT offset;
7482 int extra;
7483 rtx addr = XEXP (op, 0);
7485 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7486 if (TARGET_UPDATE
7487 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7488 && mode_supports_pre_incdec_p (mode)
7489 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7490 return true;
7492 /* Allow prefixed instructions if supported. If the bottom two bits of the
7493 offset are non-zero, we could use a prefixed instruction (which does not
7494 have the DS-form constraint that the traditional instruction had) instead
7495 of forcing the unaligned offset to a GPR. */
7496 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7497 return true;
7499 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7500 really OK. Doing this early avoids teaching all the other machinery
7501 about them. */
7502 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7503 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7505 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7506 if (!rs6000_offsettable_memref_p (op, mode, false))
7507 return false;
7509 op = address_offset (addr);
7510 if (op == NULL_RTX)
7511 return true;
7513 offset = INTVAL (op);
7514 if (TARGET_POWERPC64 && (offset & 3) != 0)
7515 return false;
7517 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7518 if (extra < 0)
7519 extra = 0;
7521 if (GET_CODE (addr) == LO_SUM)
7522 /* For lo_sum addresses, we must allow any offset except one that
7523 causes a wrap, so test only the low 16 bits. */
7524 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7526 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7529 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7530 enforce an offset divisible by 4 even for 32-bit. */
7532 bool
7533 mem_operand_ds_form (rtx op, machine_mode mode)
7535 unsigned HOST_WIDE_INT offset;
7536 int extra;
7537 rtx addr = XEXP (op, 0);
7539 /* Allow prefixed instructions if supported. If the bottom two bits of the
7540 offset are non-zero, we could use a prefixed instruction (which does not
7541 have the DS-form constraint that the traditional instruction had) instead
7542 of forcing the unaligned offset to a GPR. */
7543 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7544 return true;
7546 if (!offsettable_address_p (false, mode, addr))
7547 return false;
7549 op = address_offset (addr);
7550 if (op == NULL_RTX)
7551 return true;
7553 offset = INTVAL (op);
7554 if ((offset & 3) != 0)
7555 return false;
7557 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7558 if (extra < 0)
7559 extra = 0;
7561 if (GET_CODE (addr) == LO_SUM)
7562 /* For lo_sum addresses, we must allow any offset except one that
7563 causes a wrap, so test only the low 16 bits. */
7564 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7566 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7569 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7571 static bool
7572 reg_offset_addressing_ok_p (machine_mode mode)
7574 switch (mode)
7576 case E_V16QImode:
7577 case E_V8HImode:
7578 case E_V4SFmode:
7579 case E_V4SImode:
7580 case E_V2DFmode:
7581 case E_V2DImode:
7582 case E_V1TImode:
7583 case E_TImode:
7584 case E_TFmode:
7585 case E_KFmode:
7586 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7587 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7588 a vector mode, if we want to use the VSX registers to move it around,
7589 we need to restrict ourselves to reg+reg addressing. Similarly for
7590 IEEE 128-bit floating point that is passed in a single vector
7591 register. */
7592 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7593 return mode_supports_dq_form (mode);
7594 break;
7596 case E_SDmode:
7597 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7598 addressing for the LFIWZX and STFIWX instructions. */
7599 if (TARGET_NO_SDMODE_STACK)
7600 return false;
7601 break;
7603 default:
7604 break;
7607 return true;
7610 static bool
7611 virtual_stack_registers_memory_p (rtx op)
7613 int regnum;
7615 if (REG_P (op))
7616 regnum = REGNO (op);
7618 else if (GET_CODE (op) == PLUS
7619 && REG_P (XEXP (op, 0))
7620 && CONST_INT_P (XEXP (op, 1)))
7621 regnum = REGNO (XEXP (op, 0));
7623 else
7624 return false;
7626 return (regnum >= FIRST_VIRTUAL_REGISTER
7627 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7630 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7631 is known to not straddle a 32k boundary. This function is used
7632 to determine whether -mcmodel=medium code can use TOC pointer
7633 relative addressing for OP. This means the alignment of the TOC
7634 pointer must also be taken into account, and unfortunately that is
7635 only 8 bytes. */
7637 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7638 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7639 #endif
7641 static bool
7642 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7643 machine_mode mode)
7645 tree decl;
7646 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7648 if (!SYMBOL_REF_P (op))
7649 return false;
7651 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7652 SYMBOL_REF. */
7653 if (mode_supports_dq_form (mode))
7654 return false;
7656 dsize = GET_MODE_SIZE (mode);
7657 decl = SYMBOL_REF_DECL (op);
7658 if (!decl)
7660 if (dsize == 0)
7661 return false;
7663 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7664 replacing memory addresses with an anchor plus offset. We
7665 could find the decl by rummaging around in the block->objects
7666 VEC for the given offset but that seems like too much work. */
7667 dalign = BITS_PER_UNIT;
7668 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7669 && SYMBOL_REF_ANCHOR_P (op)
7670 && SYMBOL_REF_BLOCK (op) != NULL)
7672 struct object_block *block = SYMBOL_REF_BLOCK (op);
7674 dalign = block->alignment;
7675 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7677 else if (CONSTANT_POOL_ADDRESS_P (op))
7679 /* It would be nice to have get_pool_align().. */
7680 machine_mode cmode = get_pool_mode (op);
7682 dalign = GET_MODE_ALIGNMENT (cmode);
7685 else if (DECL_P (decl))
7687 dalign = DECL_ALIGN (decl);
7689 if (dsize == 0)
7691 /* Allow BLKmode when the entire object is known to not
7692 cross a 32k boundary. */
7693 if (!DECL_SIZE_UNIT (decl))
7694 return false;
7696 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7697 return false;
7699 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7700 if (dsize > 32768)
7701 return false;
7703 dalign /= BITS_PER_UNIT;
7704 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7705 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7706 return dalign >= dsize;
7709 else
7710 gcc_unreachable ();
7712 /* Find how many bits of the alignment we know for this access. */
7713 dalign /= BITS_PER_UNIT;
7714 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7715 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7716 mask = dalign - 1;
7717 lsb = offset & -offset;
7718 mask &= lsb - 1;
7719 dalign = mask + 1;
7721 return dalign >= dsize;
7724 static bool
7725 constant_pool_expr_p (rtx op)
7727 rtx base, offset;
7729 split_const (op, &base, &offset);
7730 return (SYMBOL_REF_P (base)
7731 && CONSTANT_POOL_ADDRESS_P (base)
7732 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7735 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7736 use that as the register to put the HIGH value into if register allocation
7737 is already done. */
7740 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7742 rtx tocrel, tocreg, hi;
7744 gcc_assert (TARGET_TOC);
7746 if (TARGET_DEBUG_ADDR)
7748 if (SYMBOL_REF_P (symbol))
7749 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7750 XSTR (symbol, 0));
7751 else
7753 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7754 GET_RTX_NAME (GET_CODE (symbol)));
7755 debug_rtx (symbol);
7759 if (!can_create_pseudo_p ())
7760 df_set_regs_ever_live (TOC_REGISTER, true);
7762 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7763 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7764 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7765 return tocrel;
7767 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7768 if (largetoc_reg != NULL)
7770 emit_move_insn (largetoc_reg, hi);
7771 hi = largetoc_reg;
7773 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7776 /* These are only used to pass through from print_operand/print_operand_address
7777 to rs6000_output_addr_const_extra over the intervening function
7778 output_addr_const which is not target code. */
7779 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7781 /* Return true if OP is a toc pointer relative address (the output
7782 of create_TOC_reference). If STRICT, do not match non-split
7783 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7784 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7785 TOCREL_OFFSET_RET respectively. */
7787 bool
7788 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7789 const_rtx *tocrel_offset_ret)
7791 if (!TARGET_TOC)
7792 return false;
7794 if (TARGET_CMODEL != CMODEL_SMALL)
7796 /* When strict ensure we have everything tidy. */
7797 if (strict
7798 && !(GET_CODE (op) == LO_SUM
7799 && REG_P (XEXP (op, 0))
7800 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7801 return false;
7803 /* When not strict, allow non-split TOC addresses and also allow
7804 (lo_sum (high ..)) TOC addresses created during reload. */
7805 if (GET_CODE (op) == LO_SUM)
7806 op = XEXP (op, 1);
7809 const_rtx tocrel_base = op;
7810 const_rtx tocrel_offset = const0_rtx;
7812 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7814 tocrel_base = XEXP (op, 0);
7815 tocrel_offset = XEXP (op, 1);
7818 if (tocrel_base_ret)
7819 *tocrel_base_ret = tocrel_base;
7820 if (tocrel_offset_ret)
7821 *tocrel_offset_ret = tocrel_offset;
7823 return (GET_CODE (tocrel_base) == UNSPEC
7824 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7825 && REG_P (XVECEXP (tocrel_base, 0, 1))
7826 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7829 /* Return true if X is a constant pool address, and also for cmodel=medium
7830 if X is a toc-relative address known to be offsettable within MODE. */
7832 bool
7833 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7834 bool strict)
7836 const_rtx tocrel_base, tocrel_offset;
7837 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7838 && (TARGET_CMODEL != CMODEL_MEDIUM
7839 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7840 || mode == QImode
7841 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7842 INTVAL (tocrel_offset), mode)));
7845 static bool
7846 legitimate_small_data_p (machine_mode mode, rtx x)
7848 return (DEFAULT_ABI == ABI_V4
7849 && !flag_pic && !TARGET_TOC
7850 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7851 && small_data_operand (x, mode));
7854 bool
7855 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7856 bool strict, bool worst_case)
7858 unsigned HOST_WIDE_INT offset;
7859 unsigned int extra;
7861 if (GET_CODE (x) != PLUS)
7862 return false;
7863 if (!REG_P (XEXP (x, 0)))
7864 return false;
7865 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7866 return false;
7867 if (mode_supports_dq_form (mode))
7868 return quad_address_p (x, mode, strict);
7869 if (!reg_offset_addressing_ok_p (mode))
7870 return virtual_stack_registers_memory_p (x);
7871 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7872 return true;
7873 if (!CONST_INT_P (XEXP (x, 1)))
7874 return false;
7876 offset = INTVAL (XEXP (x, 1));
7877 extra = 0;
7878 switch (mode)
7880 case E_DFmode:
7881 case E_DDmode:
7882 case E_DImode:
7883 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7884 addressing. */
7885 if (VECTOR_MEM_VSX_P (mode))
7886 return false;
7888 if (!worst_case)
7889 break;
7890 if (!TARGET_POWERPC64)
7891 extra = 4;
7892 else if (offset & 3)
7893 return false;
7894 break;
7896 case E_TFmode:
7897 case E_IFmode:
7898 case E_KFmode:
7899 case E_TDmode:
7900 case E_TImode:
7901 case E_PTImode:
7902 extra = 8;
7903 if (!worst_case)
7904 break;
7905 if (!TARGET_POWERPC64)
7906 extra = 12;
7907 else if (offset & 3)
7908 return false;
7909 break;
7911 default:
7912 break;
7915 if (TARGET_PREFIXED_ADDR)
7916 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7917 else
7918 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7921 bool
7922 legitimate_indexed_address_p (rtx x, int strict)
7924 rtx op0, op1;
7926 if (GET_CODE (x) != PLUS)
7927 return false;
7929 op0 = XEXP (x, 0);
7930 op1 = XEXP (x, 1);
7932 return (REG_P (op0) && REG_P (op1)
7933 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7934 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7935 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7936 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7939 bool
7940 avoiding_indexed_address_p (machine_mode mode)
7942 /* Avoid indexed addressing for modes that have non-indexed
7943 load/store instruction forms. */
7944 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7947 bool
7948 legitimate_indirect_address_p (rtx x, int strict)
7950 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7953 bool
7954 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7956 if (!TARGET_MACHO || !flag_pic
7957 || mode != SImode || !MEM_P (x))
7958 return false;
7959 x = XEXP (x, 0);
7961 if (GET_CODE (x) != LO_SUM)
7962 return false;
7963 if (!REG_P (XEXP (x, 0)))
7964 return false;
7965 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7966 return false;
7967 x = XEXP (x, 1);
7969 return CONSTANT_P (x);
7972 static bool
7973 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7975 if (GET_CODE (x) != LO_SUM)
7976 return false;
7977 if (!REG_P (XEXP (x, 0)))
7978 return false;
7979 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7980 return false;
7981 /* quad word addresses are restricted, and we can't use LO_SUM. */
7982 if (mode_supports_dq_form (mode))
7983 return false;
7984 x = XEXP (x, 1);
7986 if (TARGET_ELF || TARGET_MACHO)
7988 bool large_toc_ok;
7990 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7991 return false;
7992 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7993 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7994 recognizes some LO_SUM addresses as valid although this
7995 function says opposite. In most cases, LRA through different
7996 transformations can generate correct code for address reloads.
7997 It cannot manage only some LO_SUM cases. So we need to add
7998 code here saying that some addresses are still valid. */
7999 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8000 && small_toc_ref (x, VOIDmode));
8001 if (TARGET_TOC && ! large_toc_ok)
8002 return false;
8003 if (GET_MODE_NUNITS (mode) != 1)
8004 return false;
8005 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8006 && !(/* ??? Assume floating point reg based on mode? */
8007 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8008 return false;
8010 return CONSTANT_P (x) || large_toc_ok;
8013 return false;
8017 /* Try machine-dependent ways of modifying an illegitimate address
8018 to be legitimate. If we find one, return the new, valid address.
8019 This is used from only one place: `memory_address' in explow.c.
8021 OLDX is the address as it was before break_out_memory_refs was
8022 called. In some cases it is useful to look at this to decide what
8023 needs to be done.
8025 It is always safe for this function to do nothing. It exists to
8026 recognize opportunities to optimize the output.
8028 On RS/6000, first check for the sum of a register with a constant
8029 integer that is out of range. If so, generate code to add the
8030 constant with the low-order 16 bits masked to the register and force
8031 this result into another register (this can be done with `cau').
8032 Then generate an address of REG+(CONST&0xffff), allowing for the
8033 possibility of bit 16 being a one.
8035 Then check for the sum of a register and something not constant, try to
8036 load the other things into a register and return the sum. */
8038 static rtx
8039 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8040 machine_mode mode)
8042 unsigned int extra;
8044 if (!reg_offset_addressing_ok_p (mode)
8045 || mode_supports_dq_form (mode))
8047 if (virtual_stack_registers_memory_p (x))
8048 return x;
8050 /* In theory we should not be seeing addresses of the form reg+0,
8051 but just in case it is generated, optimize it away. */
8052 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8053 return force_reg (Pmode, XEXP (x, 0));
8055 /* For TImode with load/store quad, restrict addresses to just a single
8056 pointer, so it works with both GPRs and VSX registers. */
8057 /* Make sure both operands are registers. */
8058 else if (GET_CODE (x) == PLUS
8059 && (mode != TImode || !TARGET_VSX))
8060 return gen_rtx_PLUS (Pmode,
8061 force_reg (Pmode, XEXP (x, 0)),
8062 force_reg (Pmode, XEXP (x, 1)));
8063 else
8064 return force_reg (Pmode, x);
8066 if (SYMBOL_REF_P (x))
8068 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8069 if (model != 0)
8070 return rs6000_legitimize_tls_address (x, model);
8073 extra = 0;
8074 switch (mode)
8076 case E_TFmode:
8077 case E_TDmode:
8078 case E_TImode:
8079 case E_PTImode:
8080 case E_IFmode:
8081 case E_KFmode:
8082 /* As in legitimate_offset_address_p we do not assume
8083 worst-case. The mode here is just a hint as to the registers
8084 used. A TImode is usually in gprs, but may actually be in
8085 fprs. Leave worst-case scenario for reload to handle via
8086 insn constraints. PTImode is only GPRs. */
8087 extra = 8;
8088 break;
8089 default:
8090 break;
8093 if (GET_CODE (x) == PLUS
8094 && REG_P (XEXP (x, 0))
8095 && CONST_INT_P (XEXP (x, 1))
8096 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8097 >= 0x10000 - extra))
8099 HOST_WIDE_INT high_int, low_int;
8100 rtx sum;
8101 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8102 if (low_int >= 0x8000 - extra)
8103 low_int = 0;
8104 high_int = INTVAL (XEXP (x, 1)) - low_int;
8105 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8106 GEN_INT (high_int)), 0);
8107 return plus_constant (Pmode, sum, low_int);
8109 else if (GET_CODE (x) == PLUS
8110 && REG_P (XEXP (x, 0))
8111 && !CONST_INT_P (XEXP (x, 1))
8112 && GET_MODE_NUNITS (mode) == 1
8113 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8114 || (/* ??? Assume floating point reg based on mode? */
8115 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8116 && !avoiding_indexed_address_p (mode))
8118 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8119 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8121 else if ((TARGET_ELF
8122 #if TARGET_MACHO
8123 || !MACHO_DYNAMIC_NO_PIC_P
8124 #endif
8126 && TARGET_32BIT
8127 && TARGET_NO_TOC_OR_PCREL
8128 && !flag_pic
8129 && !CONST_INT_P (x)
8130 && !CONST_WIDE_INT_P (x)
8131 && !CONST_DOUBLE_P (x)
8132 && CONSTANT_P (x)
8133 && GET_MODE_NUNITS (mode) == 1
8134 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8135 || (/* ??? Assume floating point reg based on mode? */
8136 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8138 rtx reg = gen_reg_rtx (Pmode);
8139 if (TARGET_ELF)
8140 emit_insn (gen_elf_high (reg, x));
8141 else
8142 emit_insn (gen_macho_high (Pmode, reg, x));
8143 return gen_rtx_LO_SUM (Pmode, reg, x);
8145 else if (TARGET_TOC
8146 && SYMBOL_REF_P (x)
8147 && constant_pool_expr_p (x)
8148 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8149 return create_TOC_reference (x, NULL_RTX);
8150 else
8151 return x;
8154 /* Debug version of rs6000_legitimize_address. */
8155 static rtx
8156 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8158 rtx ret;
8159 rtx_insn *insns;
8161 start_sequence ();
8162 ret = rs6000_legitimize_address (x, oldx, mode);
8163 insns = get_insns ();
8164 end_sequence ();
8166 if (ret != x)
8168 fprintf (stderr,
8169 "\nrs6000_legitimize_address: mode %s, old code %s, "
8170 "new code %s, modified\n",
8171 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8172 GET_RTX_NAME (GET_CODE (ret)));
8174 fprintf (stderr, "Original address:\n");
8175 debug_rtx (x);
8177 fprintf (stderr, "oldx:\n");
8178 debug_rtx (oldx);
8180 fprintf (stderr, "New address:\n");
8181 debug_rtx (ret);
8183 if (insns)
8185 fprintf (stderr, "Insns added:\n");
8186 debug_rtx_list (insns, 20);
8189 else
8191 fprintf (stderr,
8192 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8193 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8195 debug_rtx (x);
8198 if (insns)
8199 emit_insn (insns);
8201 return ret;
8204 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8205 We need to emit DTP-relative relocations. */
8207 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8208 static void
8209 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8211 switch (size)
8213 case 4:
8214 fputs ("\t.long\t", file);
8215 break;
8216 case 8:
8217 fputs (DOUBLE_INT_ASM_OP, file);
8218 break;
8219 default:
8220 gcc_unreachable ();
8222 output_addr_const (file, x);
8223 if (TARGET_ELF)
8224 fputs ("@dtprel+0x8000", file);
8225 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8227 switch (SYMBOL_REF_TLS_MODEL (x))
8229 case 0:
8230 break;
8231 case TLS_MODEL_LOCAL_EXEC:
8232 fputs ("@le", file);
8233 break;
8234 case TLS_MODEL_INITIAL_EXEC:
8235 fputs ("@ie", file);
8236 break;
8237 case TLS_MODEL_GLOBAL_DYNAMIC:
8238 case TLS_MODEL_LOCAL_DYNAMIC:
8239 fputs ("@m", file);
8240 break;
8241 default:
8242 gcc_unreachable ();
8247 /* Return true if X is a symbol that refers to real (rather than emulated)
8248 TLS. */
8250 static bool
8251 rs6000_real_tls_symbol_ref_p (rtx x)
8253 return (SYMBOL_REF_P (x)
8254 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8257 /* In the name of slightly smaller debug output, and to cater to
8258 general assembler lossage, recognize various UNSPEC sequences
8259 and turn them back into a direct symbol reference. */
8261 static rtx
8262 rs6000_delegitimize_address (rtx orig_x)
8264 rtx x, y, offset;
8266 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8267 orig_x = XVECEXP (orig_x, 0, 0);
8269 orig_x = delegitimize_mem_from_attrs (orig_x);
8271 x = orig_x;
8272 if (MEM_P (x))
8273 x = XEXP (x, 0);
8275 y = x;
8276 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8277 y = XEXP (y, 1);
8279 offset = NULL_RTX;
8280 if (GET_CODE (y) == PLUS
8281 && GET_MODE (y) == Pmode
8282 && CONST_INT_P (XEXP (y, 1)))
8284 offset = XEXP (y, 1);
8285 y = XEXP (y, 0);
8288 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8290 y = XVECEXP (y, 0, 0);
8292 #ifdef HAVE_AS_TLS
8293 /* Do not associate thread-local symbols with the original
8294 constant pool symbol. */
8295 if (TARGET_XCOFF
8296 && SYMBOL_REF_P (y)
8297 && CONSTANT_POOL_ADDRESS_P (y)
8298 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8299 return orig_x;
8300 #endif
8302 if (offset != NULL_RTX)
8303 y = gen_rtx_PLUS (Pmode, y, offset);
8304 if (!MEM_P (orig_x))
8305 return y;
8306 else
8307 return replace_equiv_address_nv (orig_x, y);
8310 if (TARGET_MACHO
8311 && GET_CODE (orig_x) == LO_SUM
8312 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8314 y = XEXP (XEXP (orig_x, 1), 0);
8315 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8316 return XVECEXP (y, 0, 0);
8319 return orig_x;
8322 /* Return true if X shouldn't be emitted into the debug info.
8323 The linker doesn't like .toc section references from
8324 .debug_* sections, so reject .toc section symbols. */
8326 static bool
8327 rs6000_const_not_ok_for_debug_p (rtx x)
8329 if (GET_CODE (x) == UNSPEC)
8330 return true;
8331 if (SYMBOL_REF_P (x)
8332 && CONSTANT_POOL_ADDRESS_P (x))
8334 rtx c = get_pool_constant (x);
8335 machine_mode cmode = get_pool_mode (x);
8336 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8337 return true;
8340 return false;
8343 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8345 static bool
8346 rs6000_legitimate_combined_insn (rtx_insn *insn)
8348 int icode = INSN_CODE (insn);
8350 /* Reject creating doloop insns. Combine should not be allowed
8351 to create these for a number of reasons:
8352 1) In a nested loop, if combine creates one of these in an
8353 outer loop and the register allocator happens to allocate ctr
8354 to the outer loop insn, then the inner loop can't use ctr.
8355 Inner loops ought to be more highly optimized.
8356 2) Combine often wants to create one of these from what was
8357 originally a three insn sequence, first combining the three
8358 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8359 allocated ctr, the splitter takes use back to the three insn
8360 sequence. It's better to stop combine at the two insn
8361 sequence.
8362 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8363 insns, the register allocator sometimes uses floating point
8364 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8365 jump insn and output reloads are not implemented for jumps,
8366 the ctrsi/ctrdi splitters need to handle all possible cases.
8367 That's a pain, and it gets to be seriously difficult when a
8368 splitter that runs after reload needs memory to transfer from
8369 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8370 for the difficult case. It's better to not create problems
8371 in the first place. */
8372 if (icode != CODE_FOR_nothing
8373 && (icode == CODE_FOR_bdz_si
8374 || icode == CODE_FOR_bdz_di
8375 || icode == CODE_FOR_bdnz_si
8376 || icode == CODE_FOR_bdnz_di
8377 || icode == CODE_FOR_bdztf_si
8378 || icode == CODE_FOR_bdztf_di
8379 || icode == CODE_FOR_bdnztf_si
8380 || icode == CODE_FOR_bdnztf_di))
8381 return false;
8383 return true;
8386 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8388 static GTY(()) rtx rs6000_tls_symbol;
8389 static rtx
8390 rs6000_tls_get_addr (void)
8392 if (!rs6000_tls_symbol)
8393 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8395 return rs6000_tls_symbol;
8398 /* Construct the SYMBOL_REF for TLS GOT references. */
8400 static GTY(()) rtx rs6000_got_symbol;
8402 rs6000_got_sym (void)
8404 if (!rs6000_got_symbol)
8406 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8407 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8408 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8411 return rs6000_got_symbol;
8414 /* AIX Thread-Local Address support. */
8416 static rtx
8417 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8419 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8420 const char *name;
8421 char *tlsname;
8423 name = XSTR (addr, 0);
8424 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8425 or the symbol will be in TLS private data section. */
8426 if (name[strlen (name) - 1] != ']'
8427 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8428 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8430 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8431 strcpy (tlsname, name);
8432 strcat (tlsname,
8433 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8434 tlsaddr = copy_rtx (addr);
8435 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8437 else
8438 tlsaddr = addr;
8440 /* Place addr into TOC constant pool. */
8441 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8443 /* Output the TOC entry and create the MEM referencing the value. */
8444 if (constant_pool_expr_p (XEXP (sym, 0))
8445 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8447 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8448 mem = gen_const_mem (Pmode, tocref);
8449 set_mem_alias_set (mem, get_TOC_alias_set ());
8451 else
8452 return sym;
8454 /* Use global-dynamic for local-dynamic. */
8455 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8456 || model == TLS_MODEL_LOCAL_DYNAMIC)
8458 /* Create new TOC reference for @m symbol. */
8459 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8460 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8461 strcpy (tlsname, "*LCM");
8462 strcat (tlsname, name + 3);
8463 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8464 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8465 tocref = create_TOC_reference (modaddr, NULL_RTX);
8466 rtx modmem = gen_const_mem (Pmode, tocref);
8467 set_mem_alias_set (modmem, get_TOC_alias_set ());
8469 rtx modreg = gen_reg_rtx (Pmode);
8470 emit_insn (gen_rtx_SET (modreg, modmem));
8472 tmpreg = gen_reg_rtx (Pmode);
8473 emit_insn (gen_rtx_SET (tmpreg, mem));
8475 dest = gen_reg_rtx (Pmode);
8476 if (TARGET_32BIT)
8477 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8478 else
8479 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8480 return dest;
8482 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8483 else if (TARGET_32BIT)
8485 tlsreg = gen_reg_rtx (SImode);
8486 emit_insn (gen_tls_get_tpointer (tlsreg));
8488 else
8489 tlsreg = gen_rtx_REG (DImode, 13);
8491 /* Load the TOC value into temporary register. */
8492 tmpreg = gen_reg_rtx (Pmode);
8493 emit_insn (gen_rtx_SET (tmpreg, mem));
8494 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8495 gen_rtx_MINUS (Pmode, addr, tlsreg));
8497 /* Add TOC symbol value to TLS pointer. */
8498 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8500 return dest;
8503 /* Passes the tls arg value for global dynamic and local dynamic
8504 emit_library_call_value in rs6000_legitimize_tls_address to
8505 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8506 marker relocs put on __tls_get_addr calls. */
8507 static rtx global_tlsarg;
8509 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8510 this (thread-local) address. */
8512 static rtx
8513 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8515 rtx dest, insn;
8517 if (TARGET_XCOFF)
8518 return rs6000_legitimize_tls_address_aix (addr, model);
8520 dest = gen_reg_rtx (Pmode);
8521 if (model == TLS_MODEL_LOCAL_EXEC
8522 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8524 rtx tlsreg;
8526 if (TARGET_64BIT)
8528 tlsreg = gen_rtx_REG (Pmode, 13);
8529 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8531 else
8533 tlsreg = gen_rtx_REG (Pmode, 2);
8534 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8536 emit_insn (insn);
8538 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8540 rtx tlsreg, tmp;
8542 tmp = gen_reg_rtx (Pmode);
8543 if (TARGET_64BIT)
8545 tlsreg = gen_rtx_REG (Pmode, 13);
8546 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8548 else
8550 tlsreg = gen_rtx_REG (Pmode, 2);
8551 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8553 emit_insn (insn);
8554 if (TARGET_64BIT)
8555 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8556 else
8557 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8558 emit_insn (insn);
8560 else
8562 rtx got, tga, tmp1, tmp2;
8564 /* We currently use relocations like @got@tlsgd for tls, which
8565 means the linker will handle allocation of tls entries, placing
8566 them in the .got section. So use a pointer to the .got section,
8567 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8568 or to secondary GOT sections used by 32-bit -fPIC. */
8569 if (rs6000_pcrel_p (cfun))
8570 got = const0_rtx;
8571 else if (TARGET_64BIT)
8572 got = gen_rtx_REG (Pmode, 2);
8573 else
8575 if (flag_pic == 1)
8576 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8577 else
8579 rtx gsym = rs6000_got_sym ();
8580 got = gen_reg_rtx (Pmode);
8581 if (flag_pic == 0)
8582 rs6000_emit_move (got, gsym, Pmode);
8583 else
8585 rtx mem, lab;
8587 tmp1 = gen_reg_rtx (Pmode);
8588 tmp2 = gen_reg_rtx (Pmode);
8589 mem = gen_const_mem (Pmode, tmp1);
8590 lab = gen_label_rtx ();
8591 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8592 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8593 if (TARGET_LINK_STACK)
8594 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8595 emit_move_insn (tmp2, mem);
8596 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8597 set_unique_reg_note (last, REG_EQUAL, gsym);
8602 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8604 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8605 UNSPEC_TLSGD);
8606 tga = rs6000_tls_get_addr ();
8607 rtx argreg = gen_rtx_REG (Pmode, 3);
8608 emit_insn (gen_rtx_SET (argreg, arg));
8609 global_tlsarg = arg;
8610 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8611 global_tlsarg = NULL_RTX;
8613 /* Make a note so that the result of this call can be CSEd. */
8614 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8615 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8616 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8618 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8620 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8621 tga = rs6000_tls_get_addr ();
8622 tmp1 = gen_reg_rtx (Pmode);
8623 rtx argreg = gen_rtx_REG (Pmode, 3);
8624 emit_insn (gen_rtx_SET (argreg, arg));
8625 global_tlsarg = arg;
8626 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8627 global_tlsarg = NULL_RTX;
8629 /* Make a note so that the result of this call can be CSEd. */
8630 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8631 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8632 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8634 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8636 if (TARGET_64BIT)
8637 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8638 else
8639 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8641 else if (rs6000_tls_size == 32)
8643 tmp2 = gen_reg_rtx (Pmode);
8644 if (TARGET_64BIT)
8645 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8646 else
8647 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8648 emit_insn (insn);
8649 if (TARGET_64BIT)
8650 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8651 else
8652 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8654 else
8656 tmp2 = gen_reg_rtx (Pmode);
8657 if (TARGET_64BIT)
8658 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8659 else
8660 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8661 emit_insn (insn);
8662 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8664 emit_insn (insn);
8666 else
8668 /* IE, or 64-bit offset LE. */
8669 tmp2 = gen_reg_rtx (Pmode);
8670 if (TARGET_64BIT)
8671 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8672 else
8673 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8674 emit_insn (insn);
8675 if (rs6000_pcrel_p (cfun))
8677 if (TARGET_64BIT)
8678 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8679 else
8680 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8682 else if (TARGET_64BIT)
8683 insn = gen_tls_tls_64 (dest, tmp2, addr);
8684 else
8685 insn = gen_tls_tls_32 (dest, tmp2, addr);
8686 emit_insn (insn);
8690 return dest;
8693 /* Only create the global variable for the stack protect guard if we are using
8694 the global flavor of that guard. */
8695 static tree
8696 rs6000_init_stack_protect_guard (void)
8698 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8699 return default_stack_protect_guard ();
8701 return NULL_TREE;
8704 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8706 static bool
8707 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8709 if (GET_CODE (x) == HIGH
8710 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8711 return true;
8713 /* A TLS symbol in the TOC cannot contain a sum. */
8714 if (GET_CODE (x) == CONST
8715 && GET_CODE (XEXP (x, 0)) == PLUS
8716 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8717 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8718 return true;
8720 /* Do not place an ELF TLS symbol in the constant pool. */
8721 return TARGET_ELF && tls_referenced_p (x);
8724 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8725 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8726 can be addressed relative to the toc pointer. */
8728 static bool
8729 use_toc_relative_ref (rtx sym, machine_mode mode)
8731 return ((constant_pool_expr_p (sym)
8732 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8733 get_pool_mode (sym)))
8734 || (TARGET_CMODEL == CMODEL_MEDIUM
8735 && SYMBOL_REF_LOCAL_P (sym)
8736 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8739 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8740 that is a valid memory address for an instruction.
8741 The MODE argument is the machine mode for the MEM expression
8742 that wants to use this address.
8744 On the RS/6000, there are four valid address: a SYMBOL_REF that
8745 refers to a constant pool entry of an address (or the sum of it
8746 plus a constant), a short (16-bit signed) constant plus a register,
8747 the sum of two registers, or a register indirect, possibly with an
8748 auto-increment. For DFmode, DDmode and DImode with a constant plus
8749 register, we must ensure that both words are addressable or PowerPC64
8750 with offset word aligned.
8752 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8753 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8754 because adjacent memory cells are accessed by adding word-sized offsets
8755 during assembly output. */
8756 static bool
8757 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8759 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8760 bool quad_offset_p = mode_supports_dq_form (mode);
8762 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8763 if (VECTOR_MEM_ALTIVEC_P (mode)
8764 && GET_CODE (x) == AND
8765 && CONST_INT_P (XEXP (x, 1))
8766 && INTVAL (XEXP (x, 1)) == -16)
8767 x = XEXP (x, 0);
8769 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8770 return 0;
8771 if (legitimate_indirect_address_p (x, reg_ok_strict))
8772 return 1;
8773 if (TARGET_UPDATE
8774 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8775 && mode_supports_pre_incdec_p (mode)
8776 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8777 return 1;
8779 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8780 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8781 return 1;
8783 /* Handle restricted vector d-form offsets in ISA 3.0. */
8784 if (quad_offset_p)
8786 if (quad_address_p (x, mode, reg_ok_strict))
8787 return 1;
8789 else if (virtual_stack_registers_memory_p (x))
8790 return 1;
8792 else if (reg_offset_p)
8794 if (legitimate_small_data_p (mode, x))
8795 return 1;
8796 if (legitimate_constant_pool_address_p (x, mode,
8797 reg_ok_strict || lra_in_progress))
8798 return 1;
8801 /* For TImode, if we have TImode in VSX registers, only allow register
8802 indirect addresses. This will allow the values to go in either GPRs
8803 or VSX registers without reloading. The vector types would tend to
8804 go into VSX registers, so we allow REG+REG, while TImode seems
8805 somewhat split, in that some uses are GPR based, and some VSX based. */
8806 /* FIXME: We could loosen this by changing the following to
8807 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8808 but currently we cannot allow REG+REG addressing for TImode. See
8809 PR72827 for complete details on how this ends up hoodwinking DSE. */
8810 if (mode == TImode && TARGET_VSX)
8811 return 0;
8812 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8813 if (! reg_ok_strict
8814 && reg_offset_p
8815 && GET_CODE (x) == PLUS
8816 && REG_P (XEXP (x, 0))
8817 && (XEXP (x, 0) == virtual_stack_vars_rtx
8818 || XEXP (x, 0) == arg_pointer_rtx)
8819 && CONST_INT_P (XEXP (x, 1)))
8820 return 1;
8821 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8822 return 1;
8823 if (!FLOAT128_2REG_P (mode)
8824 && (TARGET_HARD_FLOAT
8825 || TARGET_POWERPC64
8826 || (mode != DFmode && mode != DDmode))
8827 && (TARGET_POWERPC64 || mode != DImode)
8828 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8829 && mode != PTImode
8830 && !avoiding_indexed_address_p (mode)
8831 && legitimate_indexed_address_p (x, reg_ok_strict))
8832 return 1;
8833 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8834 && mode_supports_pre_modify_p (mode)
8835 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8836 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8837 reg_ok_strict, false)
8838 || (!avoiding_indexed_address_p (mode)
8839 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8840 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8842 /* There is no prefixed version of the load/store with update. */
8843 rtx addr = XEXP (x, 1);
8844 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8846 if (reg_offset_p && !quad_offset_p
8847 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8848 return 1;
8849 return 0;
8852 /* Debug version of rs6000_legitimate_address_p. */
8853 static bool
8854 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8855 bool reg_ok_strict)
8857 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8858 fprintf (stderr,
8859 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8860 "strict = %d, reload = %s, code = %s\n",
8861 ret ? "true" : "false",
8862 GET_MODE_NAME (mode),
8863 reg_ok_strict,
8864 (reload_completed ? "after" : "before"),
8865 GET_RTX_NAME (GET_CODE (x)));
8866 debug_rtx (x);
8868 return ret;
8871 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8873 static bool
8874 rs6000_mode_dependent_address_p (const_rtx addr,
8875 addr_space_t as ATTRIBUTE_UNUSED)
8877 return rs6000_mode_dependent_address_ptr (addr);
8880 /* Go to LABEL if ADDR (a legitimate address expression)
8881 has an effect that depends on the machine mode it is used for.
8883 On the RS/6000 this is true of all integral offsets (since AltiVec
8884 and VSX modes don't allow them) or is a pre-increment or decrement.
8886 ??? Except that due to conceptual problems in offsettable_address_p
8887 we can't really report the problems of integral offsets. So leave
8888 this assuming that the adjustable offset must be valid for the
8889 sub-words of a TFmode operand, which is what we had before. */
8891 static bool
8892 rs6000_mode_dependent_address (const_rtx addr)
8894 switch (GET_CODE (addr))
8896 case PLUS:
8897 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8898 is considered a legitimate address before reload, so there
8899 are no offset restrictions in that case. Note that this
8900 condition is safe in strict mode because any address involving
8901 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8902 been rejected as illegitimate. */
8903 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8904 && XEXP (addr, 0) != arg_pointer_rtx
8905 && CONST_INT_P (XEXP (addr, 1)))
8907 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8908 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8909 if (TARGET_PREFIXED_ADDR)
8910 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8911 else
8912 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8914 break;
8916 case LO_SUM:
8917 /* Anything in the constant pool is sufficiently aligned that
8918 all bytes have the same high part address. */
8919 return !legitimate_constant_pool_address_p (addr, QImode, false);
8921 /* Auto-increment cases are now treated generically in recog.c. */
8922 case PRE_MODIFY:
8923 return TARGET_UPDATE;
8925 /* AND is only allowed in Altivec loads. */
8926 case AND:
8927 return true;
8929 default:
8930 break;
8933 return false;
8936 /* Debug version of rs6000_mode_dependent_address. */
8937 static bool
8938 rs6000_debug_mode_dependent_address (const_rtx addr)
8940 bool ret = rs6000_mode_dependent_address (addr);
8942 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8943 ret ? "true" : "false");
8944 debug_rtx (addr);
8946 return ret;
8949 /* Implement FIND_BASE_TERM. */
8952 rs6000_find_base_term (rtx op)
8954 rtx base;
8956 base = op;
8957 if (GET_CODE (base) == CONST)
8958 base = XEXP (base, 0);
8959 if (GET_CODE (base) == PLUS)
8960 base = XEXP (base, 0);
8961 if (GET_CODE (base) == UNSPEC)
8962 switch (XINT (base, 1))
8964 case UNSPEC_TOCREL:
8965 case UNSPEC_MACHOPIC_OFFSET:
8966 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8967 for aliasing purposes. */
8968 return XVECEXP (base, 0, 0);
8971 return op;
8974 /* More elaborate version of recog's offsettable_memref_p predicate
8975 that works around the ??? note of rs6000_mode_dependent_address.
8976 In particular it accepts
8978 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8980 in 32-bit mode, that the recog predicate rejects. */
8982 static bool
8983 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8985 bool worst_case;
8987 if (!MEM_P (op))
8988 return false;
8990 /* First mimic offsettable_memref_p. */
8991 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
8992 return true;
8994 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8995 the latter predicate knows nothing about the mode of the memory
8996 reference and, therefore, assumes that it is the largest supported
8997 mode (TFmode). As a consequence, legitimate offsettable memory
8998 references are rejected. rs6000_legitimate_offset_address_p contains
8999 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9000 at least with a little bit of help here given that we know the
9001 actual registers used. */
9002 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9003 || GET_MODE_SIZE (reg_mode) == 4);
9004 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9005 strict, worst_case);
9008 /* Determine the reassociation width to be used in reassociate_bb.
9009 This takes into account how many parallel operations we
9010 can actually do of a given type, and also the latency.
9012 int add/sub 6/cycle
9013 mul 2/cycle
9014 vect add/sub/mul 2/cycle
9015 fp add/sub/mul 2/cycle
9016 dfp 1/cycle
9019 static int
9020 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9021 machine_mode mode)
9023 switch (rs6000_tune)
9025 case PROCESSOR_POWER8:
9026 case PROCESSOR_POWER9:
9027 case PROCESSOR_FUTURE:
9028 if (DECIMAL_FLOAT_MODE_P (mode))
9029 return 1;
9030 if (VECTOR_MODE_P (mode))
9031 return 4;
9032 if (INTEGRAL_MODE_P (mode))
9033 return 1;
9034 if (FLOAT_MODE_P (mode))
9035 return 4;
9036 break;
9037 default:
9038 break;
9040 return 1;
9043 /* Change register usage conditional on target flags. */
9044 static void
9045 rs6000_conditional_register_usage (void)
9047 int i;
9049 if (TARGET_DEBUG_TARGET)
9050 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9052 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9053 if (TARGET_64BIT)
9054 fixed_regs[13] = call_used_regs[13] = 1;
9056 /* Conditionally disable FPRs. */
9057 if (TARGET_SOFT_FLOAT)
9058 for (i = 32; i < 64; i++)
9059 fixed_regs[i] = call_used_regs[i] = 1;
9061 /* The TOC register is not killed across calls in a way that is
9062 visible to the compiler. */
9063 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9064 call_used_regs[2] = 0;
9066 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9067 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9069 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9070 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9071 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9073 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9074 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9075 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9077 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9078 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9080 if (!TARGET_ALTIVEC && !TARGET_VSX)
9082 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9083 fixed_regs[i] = call_used_regs[i] = 1;
9084 call_used_regs[VRSAVE_REGNO] = 1;
9087 if (TARGET_ALTIVEC || TARGET_VSX)
9088 global_regs[VSCR_REGNO] = 1;
9090 if (TARGET_ALTIVEC_ABI)
9092 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9093 call_used_regs[i] = 1;
9095 /* AIX reserves VR20:31 in non-extended ABI mode. */
9096 if (TARGET_XCOFF)
9097 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9098 fixed_regs[i] = call_used_regs[i] = 1;
9103 /* Output insns to set DEST equal to the constant SOURCE as a series of
9104 lis, ori and shl instructions and return TRUE. */
9106 bool
9107 rs6000_emit_set_const (rtx dest, rtx source)
9109 machine_mode mode = GET_MODE (dest);
9110 rtx temp, set;
9111 rtx_insn *insn;
9112 HOST_WIDE_INT c;
9114 gcc_checking_assert (CONST_INT_P (source));
9115 c = INTVAL (source);
9116 switch (mode)
9118 case E_QImode:
9119 case E_HImode:
9120 emit_insn (gen_rtx_SET (dest, source));
9121 return true;
9123 case E_SImode:
9124 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9126 emit_insn (gen_rtx_SET (copy_rtx (temp),
9127 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9128 emit_insn (gen_rtx_SET (dest,
9129 gen_rtx_IOR (SImode, copy_rtx (temp),
9130 GEN_INT (c & 0xffff))));
9131 break;
9133 case E_DImode:
9134 if (!TARGET_POWERPC64)
9136 rtx hi, lo;
9138 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9139 DImode);
9140 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9141 DImode);
9142 emit_move_insn (hi, GEN_INT (c >> 32));
9143 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9144 emit_move_insn (lo, GEN_INT (c));
9146 else
9147 rs6000_emit_set_long_const (dest, c);
9148 break;
9150 default:
9151 gcc_unreachable ();
9154 insn = get_last_insn ();
9155 set = single_set (insn);
9156 if (! CONSTANT_P (SET_SRC (set)))
9157 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9159 return true;
9162 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9163 Output insns to set DEST equal to the constant C as a series of
9164 lis, ori and shl instructions. */
9166 static void
9167 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9169 rtx temp;
9170 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9172 ud1 = c & 0xffff;
9173 c = c >> 16;
9174 ud2 = c & 0xffff;
9175 c = c >> 16;
9176 ud3 = c & 0xffff;
9177 c = c >> 16;
9178 ud4 = c & 0xffff;
9180 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9181 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9182 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9184 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9185 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9187 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9189 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9190 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9191 if (ud1 != 0)
9192 emit_move_insn (dest,
9193 gen_rtx_IOR (DImode, copy_rtx (temp),
9194 GEN_INT (ud1)));
9196 else if (ud3 == 0 && ud4 == 0)
9198 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9200 gcc_assert (ud2 & 0x8000);
9201 emit_move_insn (copy_rtx (temp),
9202 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9203 if (ud1 != 0)
9204 emit_move_insn (copy_rtx (temp),
9205 gen_rtx_IOR (DImode, copy_rtx (temp),
9206 GEN_INT (ud1)));
9207 emit_move_insn (dest,
9208 gen_rtx_ZERO_EXTEND (DImode,
9209 gen_lowpart (SImode,
9210 copy_rtx (temp))));
9212 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9213 || (ud4 == 0 && ! (ud3 & 0x8000)))
9215 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9217 emit_move_insn (copy_rtx (temp),
9218 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9219 if (ud2 != 0)
9220 emit_move_insn (copy_rtx (temp),
9221 gen_rtx_IOR (DImode, copy_rtx (temp),
9222 GEN_INT (ud2)));
9223 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9224 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9225 GEN_INT (16)));
9226 if (ud1 != 0)
9227 emit_move_insn (dest,
9228 gen_rtx_IOR (DImode, copy_rtx (temp),
9229 GEN_INT (ud1)));
9231 else
9233 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9235 emit_move_insn (copy_rtx (temp),
9236 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9237 if (ud3 != 0)
9238 emit_move_insn (copy_rtx (temp),
9239 gen_rtx_IOR (DImode, copy_rtx (temp),
9240 GEN_INT (ud3)));
9242 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9243 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9244 GEN_INT (32)));
9245 if (ud2 != 0)
9246 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9247 gen_rtx_IOR (DImode, copy_rtx (temp),
9248 GEN_INT (ud2 << 16)));
9249 if (ud1 != 0)
9250 emit_move_insn (dest,
9251 gen_rtx_IOR (DImode, copy_rtx (temp),
9252 GEN_INT (ud1)));
9256 /* Helper for the following. Get rid of [r+r] memory refs
9257 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9259 static void
9260 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9262 if (MEM_P (operands[0])
9263 && !REG_P (XEXP (operands[0], 0))
9264 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9265 GET_MODE (operands[0]), false))
9266 operands[0]
9267 = replace_equiv_address (operands[0],
9268 copy_addr_to_reg (XEXP (operands[0], 0)));
9270 if (MEM_P (operands[1])
9271 && !REG_P (XEXP (operands[1], 0))
9272 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9273 GET_MODE (operands[1]), false))
9274 operands[1]
9275 = replace_equiv_address (operands[1],
9276 copy_addr_to_reg (XEXP (operands[1], 0)));
9279 /* Generate a vector of constants to permute MODE for a little-endian
9280 storage operation by swapping the two halves of a vector. */
9281 static rtvec
9282 rs6000_const_vec (machine_mode mode)
9284 int i, subparts;
9285 rtvec v;
9287 switch (mode)
9289 case E_V1TImode:
9290 subparts = 1;
9291 break;
9292 case E_V2DFmode:
9293 case E_V2DImode:
9294 subparts = 2;
9295 break;
9296 case E_V4SFmode:
9297 case E_V4SImode:
9298 subparts = 4;
9299 break;
9300 case E_V8HImode:
9301 subparts = 8;
9302 break;
9303 case E_V16QImode:
9304 subparts = 16;
9305 break;
9306 default:
9307 gcc_unreachable();
9310 v = rtvec_alloc (subparts);
9312 for (i = 0; i < subparts / 2; ++i)
9313 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9314 for (i = subparts / 2; i < subparts; ++i)
9315 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9317 return v;
9320 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9321 store operation. */
9322 void
9323 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9325 /* Scalar permutations are easier to express in integer modes rather than
9326 floating-point modes, so cast them here. We use V1TImode instead
9327 of TImode to ensure that the values don't go through GPRs. */
9328 if (FLOAT128_VECTOR_P (mode))
9330 dest = gen_lowpart (V1TImode, dest);
9331 source = gen_lowpart (V1TImode, source);
9332 mode = V1TImode;
9335 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9336 scalar. */
9337 if (mode == TImode || mode == V1TImode)
9338 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9339 GEN_INT (64))));
9340 else
9342 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9343 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9347 /* Emit a little-endian load from vector memory location SOURCE to VSX
9348 register DEST in mode MODE. The load is done with two permuting
9349 insn's that represent an lxvd2x and xxpermdi. */
9350 void
9351 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9353 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9354 V1TImode). */
9355 if (mode == TImode || mode == V1TImode)
9357 mode = V2DImode;
9358 dest = gen_lowpart (V2DImode, dest);
9359 source = adjust_address (source, V2DImode, 0);
9362 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9363 rs6000_emit_le_vsx_permute (tmp, source, mode);
9364 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9367 /* Emit a little-endian store to vector memory location DEST from VSX
9368 register SOURCE in mode MODE. The store is done with two permuting
9369 insn's that represent an xxpermdi and an stxvd2x. */
9370 void
9371 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9373 /* This should never be called during or after LRA, because it does
9374 not re-permute the source register. It is intended only for use
9375 during expand. */
9376 gcc_assert (!lra_in_progress && !reload_completed);
9378 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9379 V1TImode). */
9380 if (mode == TImode || mode == V1TImode)
9382 mode = V2DImode;
9383 dest = adjust_address (dest, V2DImode, 0);
9384 source = gen_lowpart (V2DImode, source);
9387 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9388 rs6000_emit_le_vsx_permute (tmp, source, mode);
9389 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9392 /* Emit a sequence representing a little-endian VSX load or store,
9393 moving data from SOURCE to DEST in mode MODE. This is done
9394 separately from rs6000_emit_move to ensure it is called only
9395 during expand. LE VSX loads and stores introduced later are
9396 handled with a split. The expand-time RTL generation allows
9397 us to optimize away redundant pairs of register-permutes. */
9398 void
9399 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9401 gcc_assert (!BYTES_BIG_ENDIAN
9402 && VECTOR_MEM_VSX_P (mode)
9403 && !TARGET_P9_VECTOR
9404 && !gpr_or_gpr_p (dest, source)
9405 && (MEM_P (source) ^ MEM_P (dest)));
9407 if (MEM_P (source))
9409 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9410 rs6000_emit_le_vsx_load (dest, source, mode);
9412 else
9414 if (!REG_P (source))
9415 source = force_reg (mode, source);
9416 rs6000_emit_le_vsx_store (dest, source, mode);
9420 /* Return whether a SFmode or SImode move can be done without converting one
9421 mode to another. This arrises when we have:
9423 (SUBREG:SF (REG:SI ...))
9424 (SUBREG:SI (REG:SF ...))
9426 and one of the values is in a floating point/vector register, where SFmode
9427 scalars are stored in DFmode format. */
9429 bool
9430 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9432 if (TARGET_ALLOW_SF_SUBREG)
9433 return true;
9435 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9436 return true;
9438 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9439 return true;
9441 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9442 if (SUBREG_P (dest))
9444 rtx dest_subreg = SUBREG_REG (dest);
9445 rtx src_subreg = SUBREG_REG (src);
9446 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9449 return false;
9453 /* Helper function to change moves with:
9455 (SUBREG:SF (REG:SI)) and
9456 (SUBREG:SI (REG:SF))
9458 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9459 values are stored as DFmode values in the VSX registers. We need to convert
9460 the bits before we can use a direct move or operate on the bits in the
9461 vector register as an integer type.
9463 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9465 static bool
9466 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9468 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9469 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9470 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9472 rtx inner_source = SUBREG_REG (source);
9473 machine_mode inner_mode = GET_MODE (inner_source);
9475 if (mode == SImode && inner_mode == SFmode)
9477 emit_insn (gen_movsi_from_sf (dest, inner_source));
9478 return true;
9481 if (mode == SFmode && inner_mode == SImode)
9483 emit_insn (gen_movsf_from_si (dest, inner_source));
9484 return true;
9488 return false;
9491 /* Emit a move from SOURCE to DEST in mode MODE. */
9492 void
9493 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9495 rtx operands[2];
9496 operands[0] = dest;
9497 operands[1] = source;
9499 if (TARGET_DEBUG_ADDR)
9501 fprintf (stderr,
9502 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9503 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9504 GET_MODE_NAME (mode),
9505 lra_in_progress,
9506 reload_completed,
9507 can_create_pseudo_p ());
9508 debug_rtx (dest);
9509 fprintf (stderr, "source:\n");
9510 debug_rtx (source);
9513 /* Check that we get CONST_WIDE_INT only when we should. */
9514 if (CONST_WIDE_INT_P (operands[1])
9515 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9516 gcc_unreachable ();
9518 #ifdef HAVE_AS_GNU_ATTRIBUTE
9519 /* If we use a long double type, set the flags in .gnu_attribute that say
9520 what the long double type is. This is to allow the linker's warning
9521 message for the wrong long double to be useful, even if the function does
9522 not do a call (for example, doing a 128-bit add on power9 if the long
9523 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9524 used if they aren't the default long dobule type. */
9525 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9527 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9528 rs6000_passes_float = rs6000_passes_long_double = true;
9530 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9531 rs6000_passes_float = rs6000_passes_long_double = true;
9533 #endif
9535 /* See if we need to special case SImode/SFmode SUBREG moves. */
9536 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9537 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9538 return;
9540 /* Check if GCC is setting up a block move that will end up using FP
9541 registers as temporaries. We must make sure this is acceptable. */
9542 if (MEM_P (operands[0])
9543 && MEM_P (operands[1])
9544 && mode == DImode
9545 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9546 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9547 && ! (rs6000_slow_unaligned_access (SImode,
9548 (MEM_ALIGN (operands[0]) > 32
9549 ? 32 : MEM_ALIGN (operands[0])))
9550 || rs6000_slow_unaligned_access (SImode,
9551 (MEM_ALIGN (operands[1]) > 32
9552 ? 32 : MEM_ALIGN (operands[1]))))
9553 && ! MEM_VOLATILE_P (operands [0])
9554 && ! MEM_VOLATILE_P (operands [1]))
9556 emit_move_insn (adjust_address (operands[0], SImode, 0),
9557 adjust_address (operands[1], SImode, 0));
9558 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9559 adjust_address (copy_rtx (operands[1]), SImode, 4));
9560 return;
9563 if (can_create_pseudo_p () && MEM_P (operands[0])
9564 && !gpc_reg_operand (operands[1], mode))
9565 operands[1] = force_reg (mode, operands[1]);
9567 /* Recognize the case where operand[1] is a reference to thread-local
9568 data and load its address to a register. */
9569 if (tls_referenced_p (operands[1]))
9571 enum tls_model model;
9572 rtx tmp = operands[1];
9573 rtx addend = NULL;
9575 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9577 addend = XEXP (XEXP (tmp, 0), 1);
9578 tmp = XEXP (XEXP (tmp, 0), 0);
9581 gcc_assert (SYMBOL_REF_P (tmp));
9582 model = SYMBOL_REF_TLS_MODEL (tmp);
9583 gcc_assert (model != 0);
9585 tmp = rs6000_legitimize_tls_address (tmp, model);
9586 if (addend)
9588 tmp = gen_rtx_PLUS (mode, tmp, addend);
9589 tmp = force_operand (tmp, operands[0]);
9591 operands[1] = tmp;
9594 /* 128-bit constant floating-point values on Darwin should really be loaded
9595 as two parts. However, this premature splitting is a problem when DFmode
9596 values can go into Altivec registers. */
9597 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9598 && !reg_addr[DFmode].scalar_in_vmx_p)
9600 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9601 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9602 DFmode);
9603 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9604 GET_MODE_SIZE (DFmode)),
9605 simplify_gen_subreg (DFmode, operands[1], mode,
9606 GET_MODE_SIZE (DFmode)),
9607 DFmode);
9608 return;
9611 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9612 p1:SD) if p1 is not of floating point class and p0 is spilled as
9613 we can have no analogous movsd_store for this. */
9614 if (lra_in_progress && mode == DDmode
9615 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9616 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9617 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9618 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9620 enum reg_class cl;
9621 int regno = REGNO (SUBREG_REG (operands[1]));
9623 if (!HARD_REGISTER_NUM_P (regno))
9625 cl = reg_preferred_class (regno);
9626 regno = reg_renumber[regno];
9627 if (regno < 0)
9628 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9630 if (regno >= 0 && ! FP_REGNO_P (regno))
9632 mode = SDmode;
9633 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9634 operands[1] = SUBREG_REG (operands[1]);
9637 if (lra_in_progress
9638 && mode == SDmode
9639 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9640 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9641 && (REG_P (operands[1])
9642 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9644 int regno = reg_or_subregno (operands[1]);
9645 enum reg_class cl;
9647 if (!HARD_REGISTER_NUM_P (regno))
9649 cl = reg_preferred_class (regno);
9650 gcc_assert (cl != NO_REGS);
9651 regno = reg_renumber[regno];
9652 if (regno < 0)
9653 regno = ira_class_hard_regs[cl][0];
9655 if (FP_REGNO_P (regno))
9657 if (GET_MODE (operands[0]) != DDmode)
9658 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9659 emit_insn (gen_movsd_store (operands[0], operands[1]));
9661 else if (INT_REGNO_P (regno))
9662 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9663 else
9664 gcc_unreachable();
9665 return;
9667 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9668 p:DD)) if p0 is not of floating point class and p1 is spilled as
9669 we can have no analogous movsd_load for this. */
9670 if (lra_in_progress && mode == DDmode
9671 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9672 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9673 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9674 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9676 enum reg_class cl;
9677 int regno = REGNO (SUBREG_REG (operands[0]));
9679 if (!HARD_REGISTER_NUM_P (regno))
9681 cl = reg_preferred_class (regno);
9682 regno = reg_renumber[regno];
9683 if (regno < 0)
9684 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9686 if (regno >= 0 && ! FP_REGNO_P (regno))
9688 mode = SDmode;
9689 operands[0] = SUBREG_REG (operands[0]);
9690 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9693 if (lra_in_progress
9694 && mode == SDmode
9695 && (REG_P (operands[0])
9696 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9697 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9698 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9700 int regno = reg_or_subregno (operands[0]);
9701 enum reg_class cl;
9703 if (!HARD_REGISTER_NUM_P (regno))
9705 cl = reg_preferred_class (regno);
9706 gcc_assert (cl != NO_REGS);
9707 regno = reg_renumber[regno];
9708 if (regno < 0)
9709 regno = ira_class_hard_regs[cl][0];
9711 if (FP_REGNO_P (regno))
9713 if (GET_MODE (operands[1]) != DDmode)
9714 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9715 emit_insn (gen_movsd_load (operands[0], operands[1]));
9717 else if (INT_REGNO_P (regno))
9718 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9719 else
9720 gcc_unreachable();
9721 return;
9724 /* FIXME: In the long term, this switch statement should go away
9725 and be replaced by a sequence of tests based on things like
9726 mode == Pmode. */
9727 switch (mode)
9729 case E_HImode:
9730 case E_QImode:
9731 if (CONSTANT_P (operands[1])
9732 && !CONST_INT_P (operands[1]))
9733 operands[1] = force_const_mem (mode, operands[1]);
9734 break;
9736 case E_TFmode:
9737 case E_TDmode:
9738 case E_IFmode:
9739 case E_KFmode:
9740 if (FLOAT128_2REG_P (mode))
9741 rs6000_eliminate_indexed_memrefs (operands);
9742 /* fall through */
9744 case E_DFmode:
9745 case E_DDmode:
9746 case E_SFmode:
9747 case E_SDmode:
9748 if (CONSTANT_P (operands[1])
9749 && ! easy_fp_constant (operands[1], mode))
9750 operands[1] = force_const_mem (mode, operands[1]);
9751 break;
9753 case E_V16QImode:
9754 case E_V8HImode:
9755 case E_V4SFmode:
9756 case E_V4SImode:
9757 case E_V2DFmode:
9758 case E_V2DImode:
9759 case E_V1TImode:
9760 if (CONSTANT_P (operands[1])
9761 && !easy_vector_constant (operands[1], mode))
9762 operands[1] = force_const_mem (mode, operands[1]);
9763 break;
9765 case E_SImode:
9766 case E_DImode:
9767 /* Use default pattern for address of ELF small data */
9768 if (TARGET_ELF
9769 && mode == Pmode
9770 && DEFAULT_ABI == ABI_V4
9771 && (SYMBOL_REF_P (operands[1])
9772 || GET_CODE (operands[1]) == CONST)
9773 && small_data_operand (operands[1], mode))
9775 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9776 return;
9779 /* Use the default pattern for loading up PC-relative addresses. */
9780 if (TARGET_PCREL && mode == Pmode
9781 && pcrel_local_or_external_address (operands[1], Pmode))
9783 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9784 return;
9787 if (DEFAULT_ABI == ABI_V4
9788 && mode == Pmode && mode == SImode
9789 && flag_pic == 1 && got_operand (operands[1], mode))
9791 emit_insn (gen_movsi_got (operands[0], operands[1]));
9792 return;
9795 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9796 && TARGET_NO_TOC_OR_PCREL
9797 && ! flag_pic
9798 && mode == Pmode
9799 && CONSTANT_P (operands[1])
9800 && GET_CODE (operands[1]) != HIGH
9801 && !CONST_INT_P (operands[1]))
9803 rtx target = (!can_create_pseudo_p ()
9804 ? operands[0]
9805 : gen_reg_rtx (mode));
9807 /* If this is a function address on -mcall-aixdesc,
9808 convert it to the address of the descriptor. */
9809 if (DEFAULT_ABI == ABI_AIX
9810 && SYMBOL_REF_P (operands[1])
9811 && XSTR (operands[1], 0)[0] == '.')
9813 const char *name = XSTR (operands[1], 0);
9814 rtx new_ref;
9815 while (*name == '.')
9816 name++;
9817 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9818 CONSTANT_POOL_ADDRESS_P (new_ref)
9819 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9820 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9821 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9822 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9823 operands[1] = new_ref;
9826 if (DEFAULT_ABI == ABI_DARWIN)
9828 #if TARGET_MACHO
9829 /* This is not PIC code, but could require the subset of
9830 indirections used by mdynamic-no-pic. */
9831 if (MACHO_DYNAMIC_NO_PIC_P)
9833 /* Take care of any required data indirection. */
9834 operands[1] = rs6000_machopic_legitimize_pic_address (
9835 operands[1], mode, operands[0]);
9836 if (operands[0] != operands[1])
9837 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9838 return;
9840 #endif
9841 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9842 emit_insn (gen_macho_low (Pmode, operands[0],
9843 target, operands[1]));
9844 return;
9847 emit_insn (gen_elf_high (target, operands[1]));
9848 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9849 return;
9852 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9853 and we have put it in the TOC, we just need to make a TOC-relative
9854 reference to it. */
9855 if (TARGET_TOC
9856 && SYMBOL_REF_P (operands[1])
9857 && use_toc_relative_ref (operands[1], mode))
9858 operands[1] = create_TOC_reference (operands[1], operands[0]);
9859 else if (mode == Pmode
9860 && CONSTANT_P (operands[1])
9861 && GET_CODE (operands[1]) != HIGH
9862 && ((REG_P (operands[0])
9863 && FP_REGNO_P (REGNO (operands[0])))
9864 || !CONST_INT_P (operands[1])
9865 || (num_insns_constant (operands[1], mode)
9866 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9867 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9868 && (TARGET_CMODEL == CMODEL_SMALL
9869 || can_create_pseudo_p ()
9870 || (REG_P (operands[0])
9871 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9874 #if TARGET_MACHO
9875 /* Darwin uses a special PIC legitimizer. */
9876 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9878 operands[1] =
9879 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9880 operands[0]);
9881 if (operands[0] != operands[1])
9882 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9883 return;
9885 #endif
9887 /* If we are to limit the number of things we put in the TOC and
9888 this is a symbol plus a constant we can add in one insn,
9889 just put the symbol in the TOC and add the constant. */
9890 if (GET_CODE (operands[1]) == CONST
9891 && TARGET_NO_SUM_IN_TOC
9892 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9893 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9894 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9895 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9896 && ! side_effects_p (operands[0]))
9898 rtx sym =
9899 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9900 rtx other = XEXP (XEXP (operands[1], 0), 1);
9902 sym = force_reg (mode, sym);
9903 emit_insn (gen_add3_insn (operands[0], sym, other));
9904 return;
9907 operands[1] = force_const_mem (mode, operands[1]);
9909 if (TARGET_TOC
9910 && SYMBOL_REF_P (XEXP (operands[1], 0))
9911 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9913 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9914 operands[0]);
9915 operands[1] = gen_const_mem (mode, tocref);
9916 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9919 break;
9921 case E_TImode:
9922 if (!VECTOR_MEM_VSX_P (TImode))
9923 rs6000_eliminate_indexed_memrefs (operands);
9924 break;
9926 case E_PTImode:
9927 rs6000_eliminate_indexed_memrefs (operands);
9928 break;
9930 default:
9931 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9934 /* Above, we may have called force_const_mem which may have returned
9935 an invalid address. If we can, fix this up; otherwise, reload will
9936 have to deal with it. */
9937 if (MEM_P (operands[1]))
9938 operands[1] = validize_mem (operands[1]);
9940 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9944 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9945 static void
9946 init_float128_ibm (machine_mode mode)
9948 if (!TARGET_XL_COMPAT)
9950 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
9951 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
9952 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
9953 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
9955 if (!TARGET_HARD_FLOAT)
9957 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
9958 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
9959 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
9960 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
9961 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
9962 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
9963 set_optab_libfunc (le_optab, mode, "__gcc_qle");
9964 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
9966 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
9967 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
9968 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
9969 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
9970 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
9971 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
9972 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
9973 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
9976 else
9978 set_optab_libfunc (add_optab, mode, "_xlqadd");
9979 set_optab_libfunc (sub_optab, mode, "_xlqsub");
9980 set_optab_libfunc (smul_optab, mode, "_xlqmul");
9981 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
9984 /* Add various conversions for IFmode to use the traditional TFmode
9985 names. */
9986 if (mode == IFmode)
9988 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
9989 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
9990 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
9991 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
9992 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
9993 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
9995 if (TARGET_POWERPC64)
9997 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
9998 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
9999 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10000 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10005 /* Create a decl for either complex long double multiply or complex long double
10006 divide when long double is IEEE 128-bit floating point. We can't use
10007 __multc3 and __divtc3 because the original long double using IBM extended
10008 double used those names. The complex multiply/divide functions are encoded
10009 as builtin functions with a complex result and 4 scalar inputs. */
10011 static void
10012 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10014 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10015 name, NULL_TREE);
10017 set_builtin_decl (fncode, fndecl, true);
10019 if (TARGET_DEBUG_BUILTIN)
10020 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10022 return;
10025 /* Set up IEEE 128-bit floating point routines. Use different names if the
10026 arguments can be passed in a vector register. The historical PowerPC
10027 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10028 continue to use that if we aren't using vector registers to pass IEEE
10029 128-bit floating point. */
10031 static void
10032 init_float128_ieee (machine_mode mode)
10034 if (FLOAT128_VECTOR_P (mode))
10036 static bool complex_muldiv_init_p = false;
10038 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10039 we have clone or target attributes, this will be called a second
10040 time. We want to create the built-in function only once. */
10041 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10043 complex_muldiv_init_p = true;
10044 built_in_function fncode_mul =
10045 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10046 - MIN_MODE_COMPLEX_FLOAT);
10047 built_in_function fncode_div =
10048 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10049 - MIN_MODE_COMPLEX_FLOAT);
10051 tree fntype = build_function_type_list (complex_long_double_type_node,
10052 long_double_type_node,
10053 long_double_type_node,
10054 long_double_type_node,
10055 long_double_type_node,
10056 NULL_TREE);
10058 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10059 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10062 set_optab_libfunc (add_optab, mode, "__addkf3");
10063 set_optab_libfunc (sub_optab, mode, "__subkf3");
10064 set_optab_libfunc (neg_optab, mode, "__negkf2");
10065 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10066 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10067 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10068 set_optab_libfunc (abs_optab, mode, "__abskf2");
10069 set_optab_libfunc (powi_optab, mode, "__powikf2");
10071 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10072 set_optab_libfunc (ne_optab, mode, "__nekf2");
10073 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10074 set_optab_libfunc (ge_optab, mode, "__gekf2");
10075 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10076 set_optab_libfunc (le_optab, mode, "__lekf2");
10077 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10079 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10080 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10081 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10082 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10084 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10085 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10086 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10088 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10089 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10090 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10092 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10093 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10094 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10095 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10096 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10097 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10099 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10100 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10101 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10102 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10104 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10105 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10106 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10107 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10109 if (TARGET_POWERPC64)
10111 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10112 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10113 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10114 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10118 else
10120 set_optab_libfunc (add_optab, mode, "_q_add");
10121 set_optab_libfunc (sub_optab, mode, "_q_sub");
10122 set_optab_libfunc (neg_optab, mode, "_q_neg");
10123 set_optab_libfunc (smul_optab, mode, "_q_mul");
10124 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10125 if (TARGET_PPC_GPOPT)
10126 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10128 set_optab_libfunc (eq_optab, mode, "_q_feq");
10129 set_optab_libfunc (ne_optab, mode, "_q_fne");
10130 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10131 set_optab_libfunc (ge_optab, mode, "_q_fge");
10132 set_optab_libfunc (lt_optab, mode, "_q_flt");
10133 set_optab_libfunc (le_optab, mode, "_q_fle");
10135 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10136 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10137 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10138 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10139 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10140 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10141 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10142 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10146 static void
10147 rs6000_init_libfuncs (void)
10149 /* __float128 support. */
10150 if (TARGET_FLOAT128_TYPE)
10152 init_float128_ibm (IFmode);
10153 init_float128_ieee (KFmode);
10156 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10157 if (TARGET_LONG_DOUBLE_128)
10159 if (!TARGET_IEEEQUAD)
10160 init_float128_ibm (TFmode);
10162 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10163 else
10164 init_float128_ieee (TFmode);
10168 /* Emit a potentially record-form instruction, setting DST from SRC.
10169 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10170 signed comparison of DST with zero. If DOT is 1, the generated RTL
10171 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10172 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10173 a separate COMPARE. */
10175 void
10176 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10178 if (dot == 0)
10180 emit_move_insn (dst, src);
10181 return;
10184 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10186 emit_move_insn (dst, src);
10187 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10188 return;
10191 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10192 if (dot == 1)
10194 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10195 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10197 else
10199 rtx set = gen_rtx_SET (dst, src);
10200 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10205 /* A validation routine: say whether CODE, a condition code, and MODE
10206 match. The other alternatives either don't make sense or should
10207 never be generated. */
10209 void
10210 validate_condition_mode (enum rtx_code code, machine_mode mode)
10212 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10213 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10214 && GET_MODE_CLASS (mode) == MODE_CC);
10216 /* These don't make sense. */
10217 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10218 || mode != CCUNSmode);
10220 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10221 || mode == CCUNSmode);
10223 gcc_assert (mode == CCFPmode
10224 || (code != ORDERED && code != UNORDERED
10225 && code != UNEQ && code != LTGT
10226 && code != UNGT && code != UNLT
10227 && code != UNGE && code != UNLE));
10229 /* These are invalid; the information is not there. */
10230 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10234 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10235 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10236 not zero, store there the bit offset (counted from the right) where
10237 the single stretch of 1 bits begins; and similarly for B, the bit
10238 offset where it ends. */
10240 bool
10241 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10243 unsigned HOST_WIDE_INT val = INTVAL (mask);
10244 unsigned HOST_WIDE_INT bit;
10245 int nb, ne;
10246 int n = GET_MODE_PRECISION (mode);
10248 if (mode != DImode && mode != SImode)
10249 return false;
10251 if (INTVAL (mask) >= 0)
10253 bit = val & -val;
10254 ne = exact_log2 (bit);
10255 nb = exact_log2 (val + bit);
10257 else if (val + 1 == 0)
10259 nb = n;
10260 ne = 0;
10262 else if (val & 1)
10264 val = ~val;
10265 bit = val & -val;
10266 nb = exact_log2 (bit);
10267 ne = exact_log2 (val + bit);
10269 else
10271 bit = val & -val;
10272 ne = exact_log2 (bit);
10273 if (val + bit == 0)
10274 nb = n;
10275 else
10276 nb = 0;
10279 nb--;
10281 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10282 return false;
10284 if (b)
10285 *b = nb;
10286 if (e)
10287 *e = ne;
10289 return true;
10292 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10293 or rldicr instruction, to implement an AND with it in mode MODE. */
10295 bool
10296 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10298 int nb, ne;
10300 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10301 return false;
10303 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10304 does not wrap. */
10305 if (mode == DImode)
10306 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10308 /* For SImode, rlwinm can do everything. */
10309 if (mode == SImode)
10310 return (nb < 32 && ne < 32);
10312 return false;
10315 /* Return the instruction template for an AND with mask in mode MODE, with
10316 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10318 const char *
10319 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10321 int nb, ne;
10323 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10324 gcc_unreachable ();
10326 if (mode == DImode && ne == 0)
10328 operands[3] = GEN_INT (63 - nb);
10329 if (dot)
10330 return "rldicl. %0,%1,0,%3";
10331 return "rldicl %0,%1,0,%3";
10334 if (mode == DImode && nb == 63)
10336 operands[3] = GEN_INT (63 - ne);
10337 if (dot)
10338 return "rldicr. %0,%1,0,%3";
10339 return "rldicr %0,%1,0,%3";
10342 if (nb < 32 && ne < 32)
10344 operands[3] = GEN_INT (31 - nb);
10345 operands[4] = GEN_INT (31 - ne);
10346 if (dot)
10347 return "rlwinm. %0,%1,0,%3,%4";
10348 return "rlwinm %0,%1,0,%3,%4";
10351 gcc_unreachable ();
10354 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10355 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10356 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10358 bool
10359 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10361 int nb, ne;
10363 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10364 return false;
10366 int n = GET_MODE_PRECISION (mode);
10367 int sh = -1;
10369 if (CONST_INT_P (XEXP (shift, 1)))
10371 sh = INTVAL (XEXP (shift, 1));
10372 if (sh < 0 || sh >= n)
10373 return false;
10376 rtx_code code = GET_CODE (shift);
10378 /* Convert any shift by 0 to a rotate, to simplify below code. */
10379 if (sh == 0)
10380 code = ROTATE;
10382 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10383 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10384 code = ASHIFT;
10385 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10387 code = LSHIFTRT;
10388 sh = n - sh;
10391 /* DImode rotates need rld*. */
10392 if (mode == DImode && code == ROTATE)
10393 return (nb == 63 || ne == 0 || ne == sh);
10395 /* SImode rotates need rlw*. */
10396 if (mode == SImode && code == ROTATE)
10397 return (nb < 32 && ne < 32 && sh < 32);
10399 /* Wrap-around masks are only okay for rotates. */
10400 if (ne > nb)
10401 return false;
10403 /* Variable shifts are only okay for rotates. */
10404 if (sh < 0)
10405 return false;
10407 /* Don't allow ASHIFT if the mask is wrong for that. */
10408 if (code == ASHIFT && ne < sh)
10409 return false;
10411 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10412 if the mask is wrong for that. */
10413 if (nb < 32 && ne < 32 && sh < 32
10414 && !(code == LSHIFTRT && nb >= 32 - sh))
10415 return true;
10417 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10418 if the mask is wrong for that. */
10419 if (code == LSHIFTRT)
10420 sh = 64 - sh;
10421 if (nb == 63 || ne == 0 || ne == sh)
10422 return !(code == LSHIFTRT && nb >= sh);
10424 return false;
10427 /* Return the instruction template for a shift with mask in mode MODE, with
10428 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10430 const char *
10431 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10433 int nb, ne;
10435 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10436 gcc_unreachable ();
10438 if (mode == DImode && ne == 0)
10440 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10441 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10442 operands[3] = GEN_INT (63 - nb);
10443 if (dot)
10444 return "rld%I2cl. %0,%1,%2,%3";
10445 return "rld%I2cl %0,%1,%2,%3";
10448 if (mode == DImode && nb == 63)
10450 operands[3] = GEN_INT (63 - ne);
10451 if (dot)
10452 return "rld%I2cr. %0,%1,%2,%3";
10453 return "rld%I2cr %0,%1,%2,%3";
10456 if (mode == DImode
10457 && GET_CODE (operands[4]) != LSHIFTRT
10458 && CONST_INT_P (operands[2])
10459 && ne == INTVAL (operands[2]))
10461 operands[3] = GEN_INT (63 - nb);
10462 if (dot)
10463 return "rld%I2c. %0,%1,%2,%3";
10464 return "rld%I2c %0,%1,%2,%3";
10467 if (nb < 32 && ne < 32)
10469 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10470 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10471 operands[3] = GEN_INT (31 - nb);
10472 operands[4] = GEN_INT (31 - ne);
10473 /* This insn can also be a 64-bit rotate with mask that really makes
10474 it just a shift right (with mask); the %h below are to adjust for
10475 that situation (shift count is >= 32 in that case). */
10476 if (dot)
10477 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10478 return "rlw%I2nm %0,%1,%h2,%3,%4";
10481 gcc_unreachable ();
10484 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10485 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10486 ASHIFT, or LSHIFTRT) in mode MODE. */
10488 bool
10489 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10491 int nb, ne;
10493 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10494 return false;
10496 int n = GET_MODE_PRECISION (mode);
10498 int sh = INTVAL (XEXP (shift, 1));
10499 if (sh < 0 || sh >= n)
10500 return false;
10502 rtx_code code = GET_CODE (shift);
10504 /* Convert any shift by 0 to a rotate, to simplify below code. */
10505 if (sh == 0)
10506 code = ROTATE;
10508 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10509 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10510 code = ASHIFT;
10511 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10513 code = LSHIFTRT;
10514 sh = n - sh;
10517 /* DImode rotates need rldimi. */
10518 if (mode == DImode && code == ROTATE)
10519 return (ne == sh);
10521 /* SImode rotates need rlwimi. */
10522 if (mode == SImode && code == ROTATE)
10523 return (nb < 32 && ne < 32 && sh < 32);
10525 /* Wrap-around masks are only okay for rotates. */
10526 if (ne > nb)
10527 return false;
10529 /* Don't allow ASHIFT if the mask is wrong for that. */
10530 if (code == ASHIFT && ne < sh)
10531 return false;
10533 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10534 if the mask is wrong for that. */
10535 if (nb < 32 && ne < 32 && sh < 32
10536 && !(code == LSHIFTRT && nb >= 32 - sh))
10537 return true;
10539 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10540 if the mask is wrong for that. */
10541 if (code == LSHIFTRT)
10542 sh = 64 - sh;
10543 if (ne == sh)
10544 return !(code == LSHIFTRT && nb >= sh);
10546 return false;
10549 /* Return the instruction template for an insert with mask in mode MODE, with
10550 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10552 const char *
10553 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10555 int nb, ne;
10557 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10558 gcc_unreachable ();
10560 /* Prefer rldimi because rlwimi is cracked. */
10561 if (TARGET_POWERPC64
10562 && (!dot || mode == DImode)
10563 && GET_CODE (operands[4]) != LSHIFTRT
10564 && ne == INTVAL (operands[2]))
10566 operands[3] = GEN_INT (63 - nb);
10567 if (dot)
10568 return "rldimi. %0,%1,%2,%3";
10569 return "rldimi %0,%1,%2,%3";
10572 if (nb < 32 && ne < 32)
10574 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10575 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10576 operands[3] = GEN_INT (31 - nb);
10577 operands[4] = GEN_INT (31 - ne);
10578 if (dot)
10579 return "rlwimi. %0,%1,%2,%3,%4";
10580 return "rlwimi %0,%1,%2,%3,%4";
10583 gcc_unreachable ();
10586 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10587 using two machine instructions. */
10589 bool
10590 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10592 /* There are two kinds of AND we can handle with two insns:
10593 1) those we can do with two rl* insn;
10594 2) ori[s];xori[s].
10596 We do not handle that last case yet. */
10598 /* If there is just one stretch of ones, we can do it. */
10599 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10600 return true;
10602 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10603 one insn, we can do the whole thing with two. */
10604 unsigned HOST_WIDE_INT val = INTVAL (c);
10605 unsigned HOST_WIDE_INT bit1 = val & -val;
10606 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10607 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10608 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10609 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10612 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10613 If EXPAND is true, split rotate-and-mask instructions we generate to
10614 their constituent parts as well (this is used during expand); if DOT
10615 is 1, make the last insn a record-form instruction clobbering the
10616 destination GPR and setting the CC reg (from operands[3]); if 2, set
10617 that GPR as well as the CC reg. */
10619 void
10620 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10622 gcc_assert (!(expand && dot));
10624 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10626 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10627 shift right. This generates better code than doing the masks without
10628 shifts, or shifting first right and then left. */
10629 int nb, ne;
10630 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10632 gcc_assert (mode == DImode);
10634 int shift = 63 - nb;
10635 if (expand)
10637 rtx tmp1 = gen_reg_rtx (DImode);
10638 rtx tmp2 = gen_reg_rtx (DImode);
10639 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10640 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10641 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10643 else
10645 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10646 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10647 emit_move_insn (operands[0], tmp);
10648 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10649 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10651 return;
10654 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10655 that does the rest. */
10656 unsigned HOST_WIDE_INT bit1 = val & -val;
10657 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10658 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10659 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10661 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10662 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10664 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10666 /* Two "no-rotate"-and-mask instructions, for SImode. */
10667 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10669 gcc_assert (mode == SImode);
10671 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10672 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10673 emit_move_insn (reg, tmp);
10674 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10675 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10676 return;
10679 gcc_assert (mode == DImode);
10681 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10682 insns; we have to do the first in SImode, because it wraps. */
10683 if (mask2 <= 0xffffffff
10684 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10686 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10687 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10688 GEN_INT (mask1));
10689 rtx reg_low = gen_lowpart (SImode, reg);
10690 emit_move_insn (reg_low, tmp);
10691 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10692 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10693 return;
10696 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10697 at the top end), rotate back and clear the other hole. */
10698 int right = exact_log2 (bit3);
10699 int left = 64 - right;
10701 /* Rotate the mask too. */
10702 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10704 if (expand)
10706 rtx tmp1 = gen_reg_rtx (DImode);
10707 rtx tmp2 = gen_reg_rtx (DImode);
10708 rtx tmp3 = gen_reg_rtx (DImode);
10709 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10710 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10711 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10712 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10714 else
10716 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10717 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10718 emit_move_insn (operands[0], tmp);
10719 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10720 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10721 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10725 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10726 for lfq and stfq insns iff the registers are hard registers. */
10729 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10731 /* We might have been passed a SUBREG. */
10732 if (!REG_P (reg1) || !REG_P (reg2))
10733 return 0;
10735 /* We might have been passed non floating point registers. */
10736 if (!FP_REGNO_P (REGNO (reg1))
10737 || !FP_REGNO_P (REGNO (reg2)))
10738 return 0;
10740 return (REGNO (reg1) == REGNO (reg2) - 1);
10743 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10744 addr1 and addr2 must be in consecutive memory locations
10745 (addr2 == addr1 + 8). */
10748 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10750 rtx addr1, addr2;
10751 unsigned int reg1, reg2;
10752 int offset1, offset2;
10754 /* The mems cannot be volatile. */
10755 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10756 return 0;
10758 addr1 = XEXP (mem1, 0);
10759 addr2 = XEXP (mem2, 0);
10761 /* Extract an offset (if used) from the first addr. */
10762 if (GET_CODE (addr1) == PLUS)
10764 /* If not a REG, return zero. */
10765 if (!REG_P (XEXP (addr1, 0)))
10766 return 0;
10767 else
10769 reg1 = REGNO (XEXP (addr1, 0));
10770 /* The offset must be constant! */
10771 if (!CONST_INT_P (XEXP (addr1, 1)))
10772 return 0;
10773 offset1 = INTVAL (XEXP (addr1, 1));
10776 else if (!REG_P (addr1))
10777 return 0;
10778 else
10780 reg1 = REGNO (addr1);
10781 /* This was a simple (mem (reg)) expression. Offset is 0. */
10782 offset1 = 0;
10785 /* And now for the second addr. */
10786 if (GET_CODE (addr2) == PLUS)
10788 /* If not a REG, return zero. */
10789 if (!REG_P (XEXP (addr2, 0)))
10790 return 0;
10791 else
10793 reg2 = REGNO (XEXP (addr2, 0));
10794 /* The offset must be constant. */
10795 if (!CONST_INT_P (XEXP (addr2, 1)))
10796 return 0;
10797 offset2 = INTVAL (XEXP (addr2, 1));
10800 else if (!REG_P (addr2))
10801 return 0;
10802 else
10804 reg2 = REGNO (addr2);
10805 /* This was a simple (mem (reg)) expression. Offset is 0. */
10806 offset2 = 0;
10809 /* Both of these must have the same base register. */
10810 if (reg1 != reg2)
10811 return 0;
10813 /* The offset for the second addr must be 8 more than the first addr. */
10814 if (offset2 != offset1 + 8)
10815 return 0;
10817 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10818 instructions. */
10819 return 1;
10822 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10823 need to use DDmode, in all other cases we can use the same mode. */
10824 static machine_mode
10825 rs6000_secondary_memory_needed_mode (machine_mode mode)
10827 if (lra_in_progress && mode == SDmode)
10828 return DDmode;
10829 return mode;
10832 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10833 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10834 only work on the traditional altivec registers, note if an altivec register
10835 was chosen. */
10837 static enum rs6000_reg_type
10838 register_to_reg_type (rtx reg, bool *is_altivec)
10840 HOST_WIDE_INT regno;
10841 enum reg_class rclass;
10843 if (SUBREG_P (reg))
10844 reg = SUBREG_REG (reg);
10846 if (!REG_P (reg))
10847 return NO_REG_TYPE;
10849 regno = REGNO (reg);
10850 if (!HARD_REGISTER_NUM_P (regno))
10852 if (!lra_in_progress && !reload_completed)
10853 return PSEUDO_REG_TYPE;
10855 regno = true_regnum (reg);
10856 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10857 return PSEUDO_REG_TYPE;
10860 gcc_assert (regno >= 0);
10862 if (is_altivec && ALTIVEC_REGNO_P (regno))
10863 *is_altivec = true;
10865 rclass = rs6000_regno_regclass[regno];
10866 return reg_class_to_reg_type[(int)rclass];
10869 /* Helper function to return the cost of adding a TOC entry address. */
10871 static inline int
10872 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10874 int ret;
10876 if (TARGET_CMODEL != CMODEL_SMALL)
10877 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10879 else
10880 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10882 return ret;
10885 /* Helper function for rs6000_secondary_reload to determine whether the memory
10886 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10887 needs reloading. Return negative if the memory is not handled by the memory
10888 helper functions and to try a different reload method, 0 if no additional
10889 instructions are need, and positive to give the extra cost for the
10890 memory. */
10892 static int
10893 rs6000_secondary_reload_memory (rtx addr,
10894 enum reg_class rclass,
10895 machine_mode mode)
10897 int extra_cost = 0;
10898 rtx reg, and_arg, plus_arg0, plus_arg1;
10899 addr_mask_type addr_mask;
10900 const char *type = NULL;
10901 const char *fail_msg = NULL;
10903 if (GPR_REG_CLASS_P (rclass))
10904 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10906 else if (rclass == FLOAT_REGS)
10907 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10909 else if (rclass == ALTIVEC_REGS)
10910 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10912 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10913 else if (rclass == VSX_REGS)
10914 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10915 & ~RELOAD_REG_AND_M16);
10917 /* If the register allocator hasn't made up its mind yet on the register
10918 class to use, settle on defaults to use. */
10919 else if (rclass == NO_REGS)
10921 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10922 & ~RELOAD_REG_AND_M16);
10924 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10925 addr_mask &= ~(RELOAD_REG_INDEXED
10926 | RELOAD_REG_PRE_INCDEC
10927 | RELOAD_REG_PRE_MODIFY);
10930 else
10931 addr_mask = 0;
10933 /* If the register isn't valid in this register class, just return now. */
10934 if ((addr_mask & RELOAD_REG_VALID) == 0)
10936 if (TARGET_DEBUG_ADDR)
10938 fprintf (stderr,
10939 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10940 "not valid in class\n",
10941 GET_MODE_NAME (mode), reg_class_names[rclass]);
10942 debug_rtx (addr);
10945 return -1;
10948 switch (GET_CODE (addr))
10950 /* Does the register class supports auto update forms for this mode? We
10951 don't need a scratch register, since the powerpc only supports
10952 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10953 case PRE_INC:
10954 case PRE_DEC:
10955 reg = XEXP (addr, 0);
10956 if (!base_reg_operand (addr, GET_MODE (reg)))
10958 fail_msg = "no base register #1";
10959 extra_cost = -1;
10962 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
10964 extra_cost = 1;
10965 type = "update";
10967 break;
10969 case PRE_MODIFY:
10970 reg = XEXP (addr, 0);
10971 plus_arg1 = XEXP (addr, 1);
10972 if (!base_reg_operand (reg, GET_MODE (reg))
10973 || GET_CODE (plus_arg1) != PLUS
10974 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
10976 fail_msg = "bad PRE_MODIFY";
10977 extra_cost = -1;
10980 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
10982 extra_cost = 1;
10983 type = "update";
10985 break;
10987 /* Do we need to simulate AND -16 to clear the bottom address bits used
10988 in VMX load/stores? Only allow the AND for vector sizes. */
10989 case AND:
10990 and_arg = XEXP (addr, 0);
10991 if (GET_MODE_SIZE (mode) != 16
10992 || !CONST_INT_P (XEXP (addr, 1))
10993 || INTVAL (XEXP (addr, 1)) != -16)
10995 fail_msg = "bad Altivec AND #1";
10996 extra_cost = -1;
10999 if (rclass != ALTIVEC_REGS)
11001 if (legitimate_indirect_address_p (and_arg, false))
11002 extra_cost = 1;
11004 else if (legitimate_indexed_address_p (and_arg, false))
11005 extra_cost = 2;
11007 else
11009 fail_msg = "bad Altivec AND #2";
11010 extra_cost = -1;
11013 type = "and";
11015 break;
11017 /* If this is an indirect address, make sure it is a base register. */
11018 case REG:
11019 case SUBREG:
11020 if (!legitimate_indirect_address_p (addr, false))
11022 extra_cost = 1;
11023 type = "move";
11025 break;
11027 /* If this is an indexed address, make sure the register class can handle
11028 indexed addresses for this mode. */
11029 case PLUS:
11030 plus_arg0 = XEXP (addr, 0);
11031 plus_arg1 = XEXP (addr, 1);
11033 /* (plus (plus (reg) (constant)) (constant)) is generated during
11034 push_reload processing, so handle it now. */
11035 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11037 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11039 extra_cost = 1;
11040 type = "offset";
11044 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11045 push_reload processing, so handle it now. */
11046 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11048 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11050 extra_cost = 1;
11051 type = "indexed #2";
11055 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11057 fail_msg = "no base register #2";
11058 extra_cost = -1;
11061 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11063 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11064 || !legitimate_indexed_address_p (addr, false))
11066 extra_cost = 1;
11067 type = "indexed";
11071 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11072 && CONST_INT_P (plus_arg1))
11074 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11076 extra_cost = 1;
11077 type = "vector d-form offset";
11081 /* Make sure the register class can handle offset addresses. */
11082 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11084 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11086 extra_cost = 1;
11087 type = "offset #2";
11091 else
11093 fail_msg = "bad PLUS";
11094 extra_cost = -1;
11097 break;
11099 case LO_SUM:
11100 /* Quad offsets are restricted and can't handle normal addresses. */
11101 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11103 extra_cost = -1;
11104 type = "vector d-form lo_sum";
11107 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11109 fail_msg = "bad LO_SUM";
11110 extra_cost = -1;
11113 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11115 extra_cost = 1;
11116 type = "lo_sum";
11118 break;
11120 /* Static addresses need to create a TOC entry. */
11121 case CONST:
11122 case SYMBOL_REF:
11123 case LABEL_REF:
11124 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11126 extra_cost = -1;
11127 type = "vector d-form lo_sum #2";
11130 else
11132 type = "address";
11133 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11135 break;
11137 /* TOC references look like offsetable memory. */
11138 case UNSPEC:
11139 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11141 fail_msg = "bad UNSPEC";
11142 extra_cost = -1;
11145 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11147 extra_cost = -1;
11148 type = "vector d-form lo_sum #3";
11151 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11153 extra_cost = 1;
11154 type = "toc reference";
11156 break;
11158 default:
11160 fail_msg = "bad address";
11161 extra_cost = -1;
11165 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11167 if (extra_cost < 0)
11168 fprintf (stderr,
11169 "rs6000_secondary_reload_memory error: mode = %s, "
11170 "class = %s, addr_mask = '%s', %s\n",
11171 GET_MODE_NAME (mode),
11172 reg_class_names[rclass],
11173 rs6000_debug_addr_mask (addr_mask, false),
11174 (fail_msg != NULL) ? fail_msg : "<bad address>");
11176 else
11177 fprintf (stderr,
11178 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11179 "addr_mask = '%s', extra cost = %d, %s\n",
11180 GET_MODE_NAME (mode),
11181 reg_class_names[rclass],
11182 rs6000_debug_addr_mask (addr_mask, false),
11183 extra_cost,
11184 (type) ? type : "<none>");
11186 debug_rtx (addr);
11189 return extra_cost;
11192 /* Helper function for rs6000_secondary_reload to return true if a move to a
11193 different register classe is really a simple move. */
11195 static bool
11196 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11197 enum rs6000_reg_type from_type,
11198 machine_mode mode)
11200 int size = GET_MODE_SIZE (mode);
11202 /* Add support for various direct moves available. In this function, we only
11203 look at cases where we don't need any extra registers, and one or more
11204 simple move insns are issued. Originally small integers are not allowed
11205 in FPR/VSX registers. Single precision binary floating is not a simple
11206 move because we need to convert to the single precision memory layout.
11207 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11208 need special direct move handling, which we do not support yet. */
11209 if (TARGET_DIRECT_MOVE
11210 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11211 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11213 if (TARGET_POWERPC64)
11215 /* ISA 2.07: MTVSRD or MVFVSRD. */
11216 if (size == 8)
11217 return true;
11219 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11220 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11221 return true;
11224 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11225 if (TARGET_P8_VECTOR)
11227 if (mode == SImode)
11228 return true;
11230 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11231 return true;
11234 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11235 if (mode == SDmode)
11236 return true;
11239 /* Move to/from SPR. */
11240 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11241 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11242 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11243 return true;
11245 return false;
11248 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11249 special direct moves that involve allocating an extra register, return the
11250 insn code of the helper function if there is such a function or
11251 CODE_FOR_nothing if not. */
11253 static bool
11254 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11255 enum rs6000_reg_type from_type,
11256 machine_mode mode,
11257 secondary_reload_info *sri,
11258 bool altivec_p)
11260 bool ret = false;
11261 enum insn_code icode = CODE_FOR_nothing;
11262 int cost = 0;
11263 int size = GET_MODE_SIZE (mode);
11265 if (TARGET_POWERPC64 && size == 16)
11267 /* Handle moving 128-bit values from GPRs to VSX point registers on
11268 ISA 2.07 (power8, power9) when running in 64-bit mode using
11269 XXPERMDI to glue the two 64-bit values back together. */
11270 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11272 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11273 icode = reg_addr[mode].reload_vsx_gpr;
11276 /* Handle moving 128-bit values from VSX point registers to GPRs on
11277 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11278 bottom 64-bit value. */
11279 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11281 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11282 icode = reg_addr[mode].reload_gpr_vsx;
11286 else if (TARGET_POWERPC64 && mode == SFmode)
11288 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11290 cost = 3; /* xscvdpspn, mfvsrd, and. */
11291 icode = reg_addr[mode].reload_gpr_vsx;
11294 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11296 cost = 2; /* mtvsrz, xscvspdpn. */
11297 icode = reg_addr[mode].reload_vsx_gpr;
11301 else if (!TARGET_POWERPC64 && size == 8)
11303 /* Handle moving 64-bit values from GPRs to floating point registers on
11304 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11305 32-bit values back together. Altivec register classes must be handled
11306 specially since a different instruction is used, and the secondary
11307 reload support requires a single instruction class in the scratch
11308 register constraint. However, right now TFmode is not allowed in
11309 Altivec registers, so the pattern will never match. */
11310 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11312 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11313 icode = reg_addr[mode].reload_fpr_gpr;
11317 if (icode != CODE_FOR_nothing)
11319 ret = true;
11320 if (sri)
11322 sri->icode = icode;
11323 sri->extra_cost = cost;
11327 return ret;
11330 /* Return whether a move between two register classes can be done either
11331 directly (simple move) or via a pattern that uses a single extra temporary
11332 (using ISA 2.07's direct move in this case. */
11334 static bool
11335 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11336 enum rs6000_reg_type from_type,
11337 machine_mode mode,
11338 secondary_reload_info *sri,
11339 bool altivec_p)
11341 /* Fall back to load/store reloads if either type is not a register. */
11342 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11343 return false;
11345 /* If we haven't allocated registers yet, assume the move can be done for the
11346 standard register types. */
11347 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11348 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11349 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11350 return true;
11352 /* Moves to the same set of registers is a simple move for non-specialized
11353 registers. */
11354 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11355 return true;
11357 /* Check whether a simple move can be done directly. */
11358 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11360 if (sri)
11362 sri->icode = CODE_FOR_nothing;
11363 sri->extra_cost = 0;
11365 return true;
11368 /* Now check if we can do it in a few steps. */
11369 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11370 altivec_p);
11373 /* Inform reload about cases where moving X with a mode MODE to a register in
11374 RCLASS requires an extra scratch or immediate register. Return the class
11375 needed for the immediate register.
11377 For VSX and Altivec, we may need a register to convert sp+offset into
11378 reg+sp.
11380 For misaligned 64-bit gpr loads and stores we need a register to
11381 convert an offset address to indirect. */
11383 static reg_class_t
11384 rs6000_secondary_reload (bool in_p,
11385 rtx x,
11386 reg_class_t rclass_i,
11387 machine_mode mode,
11388 secondary_reload_info *sri)
11390 enum reg_class rclass = (enum reg_class) rclass_i;
11391 reg_class_t ret = ALL_REGS;
11392 enum insn_code icode;
11393 bool default_p = false;
11394 bool done_p = false;
11396 /* Allow subreg of memory before/during reload. */
11397 bool memory_p = (MEM_P (x)
11398 || (!reload_completed && SUBREG_P (x)
11399 && MEM_P (SUBREG_REG (x))));
11401 sri->icode = CODE_FOR_nothing;
11402 sri->t_icode = CODE_FOR_nothing;
11403 sri->extra_cost = 0;
11404 icode = ((in_p)
11405 ? reg_addr[mode].reload_load
11406 : reg_addr[mode].reload_store);
11408 if (REG_P (x) || register_operand (x, mode))
11410 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11411 bool altivec_p = (rclass == ALTIVEC_REGS);
11412 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11414 if (!in_p)
11415 std::swap (to_type, from_type);
11417 /* Can we do a direct move of some sort? */
11418 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11419 altivec_p))
11421 icode = (enum insn_code)sri->icode;
11422 default_p = false;
11423 done_p = true;
11424 ret = NO_REGS;
11428 /* Make sure 0.0 is not reloaded or forced into memory. */
11429 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11431 ret = NO_REGS;
11432 default_p = false;
11433 done_p = true;
11436 /* If this is a scalar floating point value and we want to load it into the
11437 traditional Altivec registers, do it via a move via a traditional floating
11438 point register, unless we have D-form addressing. Also make sure that
11439 non-zero constants use a FPR. */
11440 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11441 && !mode_supports_vmx_dform (mode)
11442 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11443 && (memory_p || CONST_DOUBLE_P (x)))
11445 ret = FLOAT_REGS;
11446 default_p = false;
11447 done_p = true;
11450 /* Handle reload of load/stores if we have reload helper functions. */
11451 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11453 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11454 mode);
11456 if (extra_cost >= 0)
11458 done_p = true;
11459 ret = NO_REGS;
11460 if (extra_cost > 0)
11462 sri->extra_cost = extra_cost;
11463 sri->icode = icode;
11468 /* Handle unaligned loads and stores of integer registers. */
11469 if (!done_p && TARGET_POWERPC64
11470 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11471 && memory_p
11472 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11474 rtx addr = XEXP (x, 0);
11475 rtx off = address_offset (addr);
11477 if (off != NULL_RTX)
11479 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11480 unsigned HOST_WIDE_INT offset = INTVAL (off);
11482 /* We need a secondary reload when our legitimate_address_p
11483 says the address is good (as otherwise the entire address
11484 will be reloaded), and the offset is not a multiple of
11485 four or we have an address wrap. Address wrap will only
11486 occur for LO_SUMs since legitimate_offset_address_p
11487 rejects addresses for 16-byte mems that will wrap. */
11488 if (GET_CODE (addr) == LO_SUM
11489 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11490 && ((offset & 3) != 0
11491 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11492 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11493 && (offset & 3) != 0))
11495 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11496 if (in_p)
11497 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11498 : CODE_FOR_reload_di_load);
11499 else
11500 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11501 : CODE_FOR_reload_di_store);
11502 sri->extra_cost = 2;
11503 ret = NO_REGS;
11504 done_p = true;
11506 else
11507 default_p = true;
11509 else
11510 default_p = true;
11513 if (!done_p && !TARGET_POWERPC64
11514 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11515 && memory_p
11516 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11518 rtx addr = XEXP (x, 0);
11519 rtx off = address_offset (addr);
11521 if (off != NULL_RTX)
11523 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11524 unsigned HOST_WIDE_INT offset = INTVAL (off);
11526 /* We need a secondary reload when our legitimate_address_p
11527 says the address is good (as otherwise the entire address
11528 will be reloaded), and we have a wrap.
11530 legitimate_lo_sum_address_p allows LO_SUM addresses to
11531 have any offset so test for wrap in the low 16 bits.
11533 legitimate_offset_address_p checks for the range
11534 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11535 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11536 [0x7ff4,0x7fff] respectively, so test for the
11537 intersection of these ranges, [0x7ffc,0x7fff] and
11538 [0x7ff4,0x7ff7] respectively.
11540 Note that the address we see here may have been
11541 manipulated by legitimize_reload_address. */
11542 if (GET_CODE (addr) == LO_SUM
11543 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11544 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11546 if (in_p)
11547 sri->icode = CODE_FOR_reload_si_load;
11548 else
11549 sri->icode = CODE_FOR_reload_si_store;
11550 sri->extra_cost = 2;
11551 ret = NO_REGS;
11552 done_p = true;
11554 else
11555 default_p = true;
11557 else
11558 default_p = true;
11561 if (!done_p)
11562 default_p = true;
11564 if (default_p)
11565 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11567 gcc_assert (ret != ALL_REGS);
11569 if (TARGET_DEBUG_ADDR)
11571 fprintf (stderr,
11572 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11573 "mode = %s",
11574 reg_class_names[ret],
11575 in_p ? "true" : "false",
11576 reg_class_names[rclass],
11577 GET_MODE_NAME (mode));
11579 if (reload_completed)
11580 fputs (", after reload", stderr);
11582 if (!done_p)
11583 fputs (", done_p not set", stderr);
11585 if (default_p)
11586 fputs (", default secondary reload", stderr);
11588 if (sri->icode != CODE_FOR_nothing)
11589 fprintf (stderr, ", reload func = %s, extra cost = %d",
11590 insn_data[sri->icode].name, sri->extra_cost);
11592 else if (sri->extra_cost > 0)
11593 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11595 fputs ("\n", stderr);
11596 debug_rtx (x);
11599 return ret;
11602 /* Better tracing for rs6000_secondary_reload_inner. */
11604 static void
11605 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11606 bool store_p)
11608 rtx set, clobber;
11610 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11612 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11613 store_p ? "store" : "load");
11615 if (store_p)
11616 set = gen_rtx_SET (mem, reg);
11617 else
11618 set = gen_rtx_SET (reg, mem);
11620 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11621 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11624 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11625 ATTRIBUTE_NORETURN;
11627 static void
11628 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11629 bool store_p)
11631 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11632 gcc_unreachable ();
11635 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11636 reload helper functions. These were identified in
11637 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11638 reload, it calls the insns:
11639 reload_<RELOAD:mode>_<P:mptrsize>_store
11640 reload_<RELOAD:mode>_<P:mptrsize>_load
11642 which in turn calls this function, to do whatever is necessary to create
11643 valid addresses. */
11645 void
11646 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11648 int regno = true_regnum (reg);
11649 machine_mode mode = GET_MODE (reg);
11650 addr_mask_type addr_mask;
11651 rtx addr;
11652 rtx new_addr;
11653 rtx op_reg, op0, op1;
11654 rtx and_op;
11655 rtx cc_clobber;
11656 rtvec rv;
11658 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11659 || !base_reg_operand (scratch, GET_MODE (scratch)))
11660 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11662 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11663 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11665 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11666 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11668 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11669 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11671 else
11672 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11674 /* Make sure the mode is valid in this register class. */
11675 if ((addr_mask & RELOAD_REG_VALID) == 0)
11676 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11678 if (TARGET_DEBUG_ADDR)
11679 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11681 new_addr = addr = XEXP (mem, 0);
11682 switch (GET_CODE (addr))
11684 /* Does the register class support auto update forms for this mode? If
11685 not, do the update now. We don't need a scratch register, since the
11686 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11687 case PRE_INC:
11688 case PRE_DEC:
11689 op_reg = XEXP (addr, 0);
11690 if (!base_reg_operand (op_reg, Pmode))
11691 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11693 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11695 int delta = GET_MODE_SIZE (mode);
11696 if (GET_CODE (addr) == PRE_DEC)
11697 delta = -delta;
11698 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11699 new_addr = op_reg;
11701 break;
11703 case PRE_MODIFY:
11704 op0 = XEXP (addr, 0);
11705 op1 = XEXP (addr, 1);
11706 if (!base_reg_operand (op0, Pmode)
11707 || GET_CODE (op1) != PLUS
11708 || !rtx_equal_p (op0, XEXP (op1, 0)))
11709 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11711 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11713 emit_insn (gen_rtx_SET (op0, op1));
11714 new_addr = reg;
11716 break;
11718 /* Do we need to simulate AND -16 to clear the bottom address bits used
11719 in VMX load/stores? */
11720 case AND:
11721 op0 = XEXP (addr, 0);
11722 op1 = XEXP (addr, 1);
11723 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11725 if (REG_P (op0) || SUBREG_P (op0))
11726 op_reg = op0;
11728 else if (GET_CODE (op1) == PLUS)
11730 emit_insn (gen_rtx_SET (scratch, op1));
11731 op_reg = scratch;
11734 else
11735 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11737 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11738 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11739 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11740 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11741 new_addr = scratch;
11743 break;
11745 /* If this is an indirect address, make sure it is a base register. */
11746 case REG:
11747 case SUBREG:
11748 if (!base_reg_operand (addr, GET_MODE (addr)))
11750 emit_insn (gen_rtx_SET (scratch, addr));
11751 new_addr = scratch;
11753 break;
11755 /* If this is an indexed address, make sure the register class can handle
11756 indexed addresses for this mode. */
11757 case PLUS:
11758 op0 = XEXP (addr, 0);
11759 op1 = XEXP (addr, 1);
11760 if (!base_reg_operand (op0, Pmode))
11761 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11763 else if (int_reg_operand (op1, Pmode))
11765 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11767 emit_insn (gen_rtx_SET (scratch, addr));
11768 new_addr = scratch;
11772 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11774 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11775 || !quad_address_p (addr, mode, false))
11777 emit_insn (gen_rtx_SET (scratch, addr));
11778 new_addr = scratch;
11782 /* Make sure the register class can handle offset addresses. */
11783 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11785 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11787 emit_insn (gen_rtx_SET (scratch, addr));
11788 new_addr = scratch;
11792 else
11793 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11795 break;
11797 case LO_SUM:
11798 op0 = XEXP (addr, 0);
11799 op1 = XEXP (addr, 1);
11800 if (!base_reg_operand (op0, Pmode))
11801 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11803 else if (int_reg_operand (op1, Pmode))
11805 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11807 emit_insn (gen_rtx_SET (scratch, addr));
11808 new_addr = scratch;
11812 /* Quad offsets are restricted and can't handle normal addresses. */
11813 else if (mode_supports_dq_form (mode))
11815 emit_insn (gen_rtx_SET (scratch, addr));
11816 new_addr = scratch;
11819 /* Make sure the register class can handle offset addresses. */
11820 else if (legitimate_lo_sum_address_p (mode, addr, false))
11822 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11824 emit_insn (gen_rtx_SET (scratch, addr));
11825 new_addr = scratch;
11829 else
11830 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11832 break;
11834 case SYMBOL_REF:
11835 case CONST:
11836 case LABEL_REF:
11837 rs6000_emit_move (scratch, addr, Pmode);
11838 new_addr = scratch;
11839 break;
11841 default:
11842 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11845 /* Adjust the address if it changed. */
11846 if (addr != new_addr)
11848 mem = replace_equiv_address_nv (mem, new_addr);
11849 if (TARGET_DEBUG_ADDR)
11850 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11853 /* Now create the move. */
11854 if (store_p)
11855 emit_insn (gen_rtx_SET (mem, reg));
11856 else
11857 emit_insn (gen_rtx_SET (reg, mem));
11859 return;
11862 /* Convert reloads involving 64-bit gprs and misaligned offset
11863 addressing, or multiple 32-bit gprs and offsets that are too large,
11864 to use indirect addressing. */
11866 void
11867 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11869 int regno = true_regnum (reg);
11870 enum reg_class rclass;
11871 rtx addr;
11872 rtx scratch_or_premodify = scratch;
11874 if (TARGET_DEBUG_ADDR)
11876 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11877 store_p ? "store" : "load");
11878 fprintf (stderr, "reg:\n");
11879 debug_rtx (reg);
11880 fprintf (stderr, "mem:\n");
11881 debug_rtx (mem);
11882 fprintf (stderr, "scratch:\n");
11883 debug_rtx (scratch);
11886 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11887 gcc_assert (MEM_P (mem));
11888 rclass = REGNO_REG_CLASS (regno);
11889 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11890 addr = XEXP (mem, 0);
11892 if (GET_CODE (addr) == PRE_MODIFY)
11894 gcc_assert (REG_P (XEXP (addr, 0))
11895 && GET_CODE (XEXP (addr, 1)) == PLUS
11896 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11897 scratch_or_premodify = XEXP (addr, 0);
11898 addr = XEXP (addr, 1);
11900 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11902 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11904 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11906 /* Now create the move. */
11907 if (store_p)
11908 emit_insn (gen_rtx_SET (mem, reg));
11909 else
11910 emit_insn (gen_rtx_SET (reg, mem));
11912 return;
11915 /* Given an rtx X being reloaded into a reg required to be
11916 in class CLASS, return the class of reg to actually use.
11917 In general this is just CLASS; but on some machines
11918 in some cases it is preferable to use a more restrictive class.
11920 On the RS/6000, we have to return NO_REGS when we want to reload a
11921 floating-point CONST_DOUBLE to force it to be copied to memory.
11923 We also don't want to reload integer values into floating-point
11924 registers if we can at all help it. In fact, this can
11925 cause reload to die, if it tries to generate a reload of CTR
11926 into a FP register and discovers it doesn't have the memory location
11927 required.
11929 ??? Would it be a good idea to have reload do the converse, that is
11930 try to reload floating modes into FP registers if possible?
11933 static enum reg_class
11934 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11936 machine_mode mode = GET_MODE (x);
11937 bool is_constant = CONSTANT_P (x);
11939 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11940 reload class for it. */
11941 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11942 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
11943 return NO_REGS;
11945 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
11946 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
11947 return NO_REGS;
11949 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11950 the reloading of address expressions using PLUS into floating point
11951 registers. */
11952 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
11954 if (is_constant)
11956 /* Zero is always allowed in all VSX registers. */
11957 if (x == CONST0_RTX (mode))
11958 return rclass;
11960 /* If this is a vector constant that can be formed with a few Altivec
11961 instructions, we want altivec registers. */
11962 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
11963 return ALTIVEC_REGS;
11965 /* If this is an integer constant that can easily be loaded into
11966 vector registers, allow it. */
11967 if (CONST_INT_P (x))
11969 HOST_WIDE_INT value = INTVAL (x);
11971 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
11972 2.06 can generate it in the Altivec registers with
11973 VSPLTI<x>. */
11974 if (value == -1)
11976 if (TARGET_P8_VECTOR)
11977 return rclass;
11978 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11979 return ALTIVEC_REGS;
11980 else
11981 return NO_REGS;
11984 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
11985 a sign extend in the Altivec registers. */
11986 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
11987 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
11988 return ALTIVEC_REGS;
11991 /* Force constant to memory. */
11992 return NO_REGS;
11995 /* D-form addressing can easily reload the value. */
11996 if (mode_supports_vmx_dform (mode)
11997 || mode_supports_dq_form (mode))
11998 return rclass;
12000 /* If this is a scalar floating point value and we don't have D-form
12001 addressing, prefer the traditional floating point registers so that we
12002 can use D-form (register+offset) addressing. */
12003 if (rclass == VSX_REGS
12004 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12005 return FLOAT_REGS;
12007 /* Prefer the Altivec registers if Altivec is handling the vector
12008 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12009 loads. */
12010 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12011 || mode == V1TImode)
12012 return ALTIVEC_REGS;
12014 return rclass;
12017 if (is_constant || GET_CODE (x) == PLUS)
12019 if (reg_class_subset_p (GENERAL_REGS, rclass))
12020 return GENERAL_REGS;
12021 if (reg_class_subset_p (BASE_REGS, rclass))
12022 return BASE_REGS;
12023 return NO_REGS;
12026 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12027 return GENERAL_REGS;
12029 return rclass;
12032 /* Debug version of rs6000_preferred_reload_class. */
12033 static enum reg_class
12034 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12036 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12038 fprintf (stderr,
12039 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12040 "mode = %s, x:\n",
12041 reg_class_names[ret], reg_class_names[rclass],
12042 GET_MODE_NAME (GET_MODE (x)));
12043 debug_rtx (x);
12045 return ret;
12048 /* If we are copying between FP or AltiVec registers and anything else, we need
12049 a memory location. The exception is when we are targeting ppc64 and the
12050 move to/from fpr to gpr instructions are available. Also, under VSX, you
12051 can copy vector registers from the FP register set to the Altivec register
12052 set and vice versa. */
12054 static bool
12055 rs6000_secondary_memory_needed (machine_mode mode,
12056 reg_class_t from_class,
12057 reg_class_t to_class)
12059 enum rs6000_reg_type from_type, to_type;
12060 bool altivec_p = ((from_class == ALTIVEC_REGS)
12061 || (to_class == ALTIVEC_REGS));
12063 /* If a simple/direct move is available, we don't need secondary memory */
12064 from_type = reg_class_to_reg_type[(int)from_class];
12065 to_type = reg_class_to_reg_type[(int)to_class];
12067 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12068 (secondary_reload_info *)0, altivec_p))
12069 return false;
12071 /* If we have a floating point or vector register class, we need to use
12072 memory to transfer the data. */
12073 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12074 return true;
12076 return false;
12079 /* Debug version of rs6000_secondary_memory_needed. */
12080 static bool
12081 rs6000_debug_secondary_memory_needed (machine_mode mode,
12082 reg_class_t from_class,
12083 reg_class_t to_class)
12085 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12087 fprintf (stderr,
12088 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12089 "to_class = %s, mode = %s\n",
12090 ret ? "true" : "false",
12091 reg_class_names[from_class],
12092 reg_class_names[to_class],
12093 GET_MODE_NAME (mode));
12095 return ret;
12098 /* Return the register class of a scratch register needed to copy IN into
12099 or out of a register in RCLASS in MODE. If it can be done directly,
12100 NO_REGS is returned. */
12102 static enum reg_class
12103 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12104 rtx in)
12106 int regno;
12108 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12109 #if TARGET_MACHO
12110 && MACHOPIC_INDIRECT
12111 #endif
12114 /* We cannot copy a symbolic operand directly into anything
12115 other than BASE_REGS for TARGET_ELF. So indicate that a
12116 register from BASE_REGS is needed as an intermediate
12117 register.
12119 On Darwin, pic addresses require a load from memory, which
12120 needs a base register. */
12121 if (rclass != BASE_REGS
12122 && (SYMBOL_REF_P (in)
12123 || GET_CODE (in) == HIGH
12124 || GET_CODE (in) == LABEL_REF
12125 || GET_CODE (in) == CONST))
12126 return BASE_REGS;
12129 if (REG_P (in))
12131 regno = REGNO (in);
12132 if (!HARD_REGISTER_NUM_P (regno))
12134 regno = true_regnum (in);
12135 if (!HARD_REGISTER_NUM_P (regno))
12136 regno = -1;
12139 else if (SUBREG_P (in))
12141 regno = true_regnum (in);
12142 if (!HARD_REGISTER_NUM_P (regno))
12143 regno = -1;
12145 else
12146 regno = -1;
12148 /* If we have VSX register moves, prefer moving scalar values between
12149 Altivec registers and GPR by going via an FPR (and then via memory)
12150 instead of reloading the secondary memory address for Altivec moves. */
12151 if (TARGET_VSX
12152 && GET_MODE_SIZE (mode) < 16
12153 && !mode_supports_vmx_dform (mode)
12154 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12155 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12156 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12157 && (regno >= 0 && INT_REGNO_P (regno)))))
12158 return FLOAT_REGS;
12160 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12161 into anything. */
12162 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12163 || (regno >= 0 && INT_REGNO_P (regno)))
12164 return NO_REGS;
12166 /* Constants, memory, and VSX registers can go into VSX registers (both the
12167 traditional floating point and the altivec registers). */
12168 if (rclass == VSX_REGS
12169 && (regno == -1 || VSX_REGNO_P (regno)))
12170 return NO_REGS;
12172 /* Constants, memory, and FP registers can go into FP registers. */
12173 if ((regno == -1 || FP_REGNO_P (regno))
12174 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12175 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12177 /* Memory, and AltiVec registers can go into AltiVec registers. */
12178 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12179 && rclass == ALTIVEC_REGS)
12180 return NO_REGS;
12182 /* We can copy among the CR registers. */
12183 if ((rclass == CR_REGS || rclass == CR0_REGS)
12184 && regno >= 0 && CR_REGNO_P (regno))
12185 return NO_REGS;
12187 /* Otherwise, we need GENERAL_REGS. */
12188 return GENERAL_REGS;
12191 /* Debug version of rs6000_secondary_reload_class. */
12192 static enum reg_class
12193 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12194 machine_mode mode, rtx in)
12196 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12197 fprintf (stderr,
12198 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12199 "mode = %s, input rtx:\n",
12200 reg_class_names[ret], reg_class_names[rclass],
12201 GET_MODE_NAME (mode));
12202 debug_rtx (in);
12204 return ret;
12207 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12209 static bool
12210 rs6000_can_change_mode_class (machine_mode from,
12211 machine_mode to,
12212 reg_class_t rclass)
12214 unsigned from_size = GET_MODE_SIZE (from);
12215 unsigned to_size = GET_MODE_SIZE (to);
12217 if (from_size != to_size)
12219 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12221 if (reg_classes_intersect_p (xclass, rclass))
12223 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12224 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12225 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12226 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12228 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12229 single register under VSX because the scalar part of the register
12230 is in the upper 64-bits, and not the lower 64-bits. Types like
12231 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12232 IEEE floating point can't overlap, and neither can small
12233 values. */
12235 if (to_float128_vector_p && from_float128_vector_p)
12236 return true;
12238 else if (to_float128_vector_p || from_float128_vector_p)
12239 return false;
12241 /* TDmode in floating-mode registers must always go into a register
12242 pair with the most significant word in the even-numbered register
12243 to match ISA requirements. In little-endian mode, this does not
12244 match subreg numbering, so we cannot allow subregs. */
12245 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12246 return false;
12248 if (from_size < 8 || to_size < 8)
12249 return false;
12251 if (from_size == 8 && (8 * to_nregs) != to_size)
12252 return false;
12254 if (to_size == 8 && (8 * from_nregs) != from_size)
12255 return false;
12257 return true;
12259 else
12260 return true;
12263 /* Since the VSX register set includes traditional floating point registers
12264 and altivec registers, just check for the size being different instead of
12265 trying to check whether the modes are vector modes. Otherwise it won't
12266 allow say DF and DI to change classes. For types like TFmode and TDmode
12267 that take 2 64-bit registers, rather than a single 128-bit register, don't
12268 allow subregs of those types to other 128 bit types. */
12269 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12271 unsigned num_regs = (from_size + 15) / 16;
12272 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12273 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12274 return false;
12276 return (from_size == 8 || from_size == 16);
12279 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12280 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12281 return false;
12283 return true;
12286 /* Debug version of rs6000_can_change_mode_class. */
12287 static bool
12288 rs6000_debug_can_change_mode_class (machine_mode from,
12289 machine_mode to,
12290 reg_class_t rclass)
12292 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12294 fprintf (stderr,
12295 "rs6000_can_change_mode_class, return %s, from = %s, "
12296 "to = %s, rclass = %s\n",
12297 ret ? "true" : "false",
12298 GET_MODE_NAME (from), GET_MODE_NAME (to),
12299 reg_class_names[rclass]);
12301 return ret;
12304 /* Return a string to do a move operation of 128 bits of data. */
12306 const char *
12307 rs6000_output_move_128bit (rtx operands[])
12309 rtx dest = operands[0];
12310 rtx src = operands[1];
12311 machine_mode mode = GET_MODE (dest);
12312 int dest_regno;
12313 int src_regno;
12314 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12315 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12317 if (REG_P (dest))
12319 dest_regno = REGNO (dest);
12320 dest_gpr_p = INT_REGNO_P (dest_regno);
12321 dest_fp_p = FP_REGNO_P (dest_regno);
12322 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12323 dest_vsx_p = dest_fp_p | dest_vmx_p;
12325 else
12327 dest_regno = -1;
12328 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12331 if (REG_P (src))
12333 src_regno = REGNO (src);
12334 src_gpr_p = INT_REGNO_P (src_regno);
12335 src_fp_p = FP_REGNO_P (src_regno);
12336 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12337 src_vsx_p = src_fp_p | src_vmx_p;
12339 else
12341 src_regno = -1;
12342 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12345 /* Register moves. */
12346 if (dest_regno >= 0 && src_regno >= 0)
12348 if (dest_gpr_p)
12350 if (src_gpr_p)
12351 return "#";
12353 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12354 return (WORDS_BIG_ENDIAN
12355 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12356 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12358 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12359 return "#";
12362 else if (TARGET_VSX && dest_vsx_p)
12364 if (src_vsx_p)
12365 return "xxlor %x0,%x1,%x1";
12367 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12368 return (WORDS_BIG_ENDIAN
12369 ? "mtvsrdd %x0,%1,%L1"
12370 : "mtvsrdd %x0,%L1,%1");
12372 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12373 return "#";
12376 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12377 return "vor %0,%1,%1";
12379 else if (dest_fp_p && src_fp_p)
12380 return "#";
12383 /* Loads. */
12384 else if (dest_regno >= 0 && MEM_P (src))
12386 if (dest_gpr_p)
12388 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12389 return "lq %0,%1";
12390 else
12391 return "#";
12394 else if (TARGET_ALTIVEC && dest_vmx_p
12395 && altivec_indexed_or_indirect_operand (src, mode))
12396 return "lvx %0,%y1";
12398 else if (TARGET_VSX && dest_vsx_p)
12400 if (mode_supports_dq_form (mode)
12401 && quad_address_p (XEXP (src, 0), mode, true))
12402 return "lxv %x0,%1";
12404 else if (TARGET_P9_VECTOR)
12405 return "lxvx %x0,%y1";
12407 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12408 return "lxvw4x %x0,%y1";
12410 else
12411 return "lxvd2x %x0,%y1";
12414 else if (TARGET_ALTIVEC && dest_vmx_p)
12415 return "lvx %0,%y1";
12417 else if (dest_fp_p)
12418 return "#";
12421 /* Stores. */
12422 else if (src_regno >= 0 && MEM_P (dest))
12424 if (src_gpr_p)
12426 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12427 return "stq %1,%0";
12428 else
12429 return "#";
12432 else if (TARGET_ALTIVEC && src_vmx_p
12433 && altivec_indexed_or_indirect_operand (dest, mode))
12434 return "stvx %1,%y0";
12436 else if (TARGET_VSX && src_vsx_p)
12438 if (mode_supports_dq_form (mode)
12439 && quad_address_p (XEXP (dest, 0), mode, true))
12440 return "stxv %x1,%0";
12442 else if (TARGET_P9_VECTOR)
12443 return "stxvx %x1,%y0";
12445 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12446 return "stxvw4x %x1,%y0";
12448 else
12449 return "stxvd2x %x1,%y0";
12452 else if (TARGET_ALTIVEC && src_vmx_p)
12453 return "stvx %1,%y0";
12455 else if (src_fp_p)
12456 return "#";
12459 /* Constants. */
12460 else if (dest_regno >= 0
12461 && (CONST_INT_P (src)
12462 || CONST_WIDE_INT_P (src)
12463 || CONST_DOUBLE_P (src)
12464 || GET_CODE (src) == CONST_VECTOR))
12466 if (dest_gpr_p)
12467 return "#";
12469 else if ((dest_vmx_p && TARGET_ALTIVEC)
12470 || (dest_vsx_p && TARGET_VSX))
12471 return output_vec_const_move (operands);
12474 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12477 /* Validate a 128-bit move. */
12478 bool
12479 rs6000_move_128bit_ok_p (rtx operands[])
12481 machine_mode mode = GET_MODE (operands[0]);
12482 return (gpc_reg_operand (operands[0], mode)
12483 || gpc_reg_operand (operands[1], mode));
12486 /* Return true if a 128-bit move needs to be split. */
12487 bool
12488 rs6000_split_128bit_ok_p (rtx operands[])
12490 if (!reload_completed)
12491 return false;
12493 if (!gpr_or_gpr_p (operands[0], operands[1]))
12494 return false;
12496 if (quad_load_store_p (operands[0], operands[1]))
12497 return false;
12499 return true;
12503 /* Given a comparison operation, return the bit number in CCR to test. We
12504 know this is a valid comparison.
12506 SCC_P is 1 if this is for an scc. That means that %D will have been
12507 used instead of %C, so the bits will be in different places.
12509 Return -1 if OP isn't a valid comparison for some reason. */
12512 ccr_bit (rtx op, int scc_p)
12514 enum rtx_code code = GET_CODE (op);
12515 machine_mode cc_mode;
12516 int cc_regnum;
12517 int base_bit;
12518 rtx reg;
12520 if (!COMPARISON_P (op))
12521 return -1;
12523 reg = XEXP (op, 0);
12525 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12526 return -1;
12528 cc_mode = GET_MODE (reg);
12529 cc_regnum = REGNO (reg);
12530 base_bit = 4 * (cc_regnum - CR0_REGNO);
12532 validate_condition_mode (code, cc_mode);
12534 /* When generating a sCOND operation, only positive conditions are
12535 allowed. */
12536 if (scc_p)
12537 switch (code)
12539 case EQ:
12540 case GT:
12541 case LT:
12542 case UNORDERED:
12543 case GTU:
12544 case LTU:
12545 break;
12546 default:
12547 return -1;
12550 switch (code)
12552 case NE:
12553 return scc_p ? base_bit + 3 : base_bit + 2;
12554 case EQ:
12555 return base_bit + 2;
12556 case GT: case GTU: case UNLE:
12557 return base_bit + 1;
12558 case LT: case LTU: case UNGE:
12559 return base_bit;
12560 case ORDERED: case UNORDERED:
12561 return base_bit + 3;
12563 case GE: case GEU:
12564 /* If scc, we will have done a cror to put the bit in the
12565 unordered position. So test that bit. For integer, this is ! LT
12566 unless this is an scc insn. */
12567 return scc_p ? base_bit + 3 : base_bit;
12569 case LE: case LEU:
12570 return scc_p ? base_bit + 3 : base_bit + 1;
12572 default:
12573 return -1;
12577 /* Return the GOT register. */
12580 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12582 /* The second flow pass currently (June 1999) can't update
12583 regs_ever_live without disturbing other parts of the compiler, so
12584 update it here to make the prolog/epilogue code happy. */
12585 if (!can_create_pseudo_p ()
12586 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12587 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12589 crtl->uses_pic_offset_table = 1;
12591 return pic_offset_table_rtx;
12594 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12596 /* Write out a function code label. */
12598 void
12599 rs6000_output_function_entry (FILE *file, const char *fname)
12601 if (fname[0] != '.')
12603 switch (DEFAULT_ABI)
12605 default:
12606 gcc_unreachable ();
12608 case ABI_AIX:
12609 if (DOT_SYMBOLS)
12610 putc ('.', file);
12611 else
12612 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12613 break;
12615 case ABI_ELFv2:
12616 case ABI_V4:
12617 case ABI_DARWIN:
12618 break;
12622 RS6000_OUTPUT_BASENAME (file, fname);
12625 /* Print an operand. Recognize special options, documented below. */
12627 #if TARGET_ELF
12628 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12629 only introduced by the linker, when applying the sda21
12630 relocation. */
12631 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12632 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12633 #else
12634 #define SMALL_DATA_RELOC "sda21"
12635 #define SMALL_DATA_REG 0
12636 #endif
12638 void
12639 print_operand (FILE *file, rtx x, int code)
12641 int i;
12642 unsigned HOST_WIDE_INT uval;
12644 switch (code)
12646 /* %a is output_address. */
12648 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12649 output_operand. */
12651 case 'D':
12652 /* Like 'J' but get to the GT bit only. */
12653 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12655 output_operand_lossage ("invalid %%D value");
12656 return;
12659 /* Bit 1 is GT bit. */
12660 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12662 /* Add one for shift count in rlinm for scc. */
12663 fprintf (file, "%d", i + 1);
12664 return;
12666 case 'e':
12667 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12668 if (! INT_P (x))
12670 output_operand_lossage ("invalid %%e value");
12671 return;
12674 uval = INTVAL (x);
12675 if ((uval & 0xffff) == 0 && uval != 0)
12676 putc ('s', file);
12677 return;
12679 case 'E':
12680 /* X is a CR register. Print the number of the EQ bit of the CR */
12681 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12682 output_operand_lossage ("invalid %%E value");
12683 else
12684 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12685 return;
12687 case 'f':
12688 /* X is a CR register. Print the shift count needed to move it
12689 to the high-order four bits. */
12690 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12691 output_operand_lossage ("invalid %%f value");
12692 else
12693 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12694 return;
12696 case 'F':
12697 /* Similar, but print the count for the rotate in the opposite
12698 direction. */
12699 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12700 output_operand_lossage ("invalid %%F value");
12701 else
12702 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12703 return;
12705 case 'G':
12706 /* X is a constant integer. If it is negative, print "m",
12707 otherwise print "z". This is to make an aze or ame insn. */
12708 if (!CONST_INT_P (x))
12709 output_operand_lossage ("invalid %%G value");
12710 else if (INTVAL (x) >= 0)
12711 putc ('z', file);
12712 else
12713 putc ('m', file);
12714 return;
12716 case 'h':
12717 /* If constant, output low-order five bits. Otherwise, write
12718 normally. */
12719 if (INT_P (x))
12720 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12721 else
12722 print_operand (file, x, 0);
12723 return;
12725 case 'H':
12726 /* If constant, output low-order six bits. Otherwise, write
12727 normally. */
12728 if (INT_P (x))
12729 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12730 else
12731 print_operand (file, x, 0);
12732 return;
12734 case 'I':
12735 /* Print `i' if this is a constant, else nothing. */
12736 if (INT_P (x))
12737 putc ('i', file);
12738 return;
12740 case 'j':
12741 /* Write the bit number in CCR for jump. */
12742 i = ccr_bit (x, 0);
12743 if (i == -1)
12744 output_operand_lossage ("invalid %%j code");
12745 else
12746 fprintf (file, "%d", i);
12747 return;
12749 case 'J':
12750 /* Similar, but add one for shift count in rlinm for scc and pass
12751 scc flag to `ccr_bit'. */
12752 i = ccr_bit (x, 1);
12753 if (i == -1)
12754 output_operand_lossage ("invalid %%J code");
12755 else
12756 /* If we want bit 31, write a shift count of zero, not 32. */
12757 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12758 return;
12760 case 'k':
12761 /* X must be a constant. Write the 1's complement of the
12762 constant. */
12763 if (! INT_P (x))
12764 output_operand_lossage ("invalid %%k value");
12765 else
12766 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12767 return;
12769 case 'K':
12770 /* X must be a symbolic constant on ELF. Write an
12771 expression suitable for an 'addi' that adds in the low 16
12772 bits of the MEM. */
12773 if (GET_CODE (x) == CONST)
12775 if (GET_CODE (XEXP (x, 0)) != PLUS
12776 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12777 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12778 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12779 output_operand_lossage ("invalid %%K value");
12781 print_operand_address (file, x);
12782 fputs ("@l", file);
12783 return;
12785 /* %l is output_asm_label. */
12787 case 'L':
12788 /* Write second word of DImode or DFmode reference. Works on register
12789 or non-indexed memory only. */
12790 if (REG_P (x))
12791 fputs (reg_names[REGNO (x) + 1], file);
12792 else if (MEM_P (x))
12794 machine_mode mode = GET_MODE (x);
12795 /* Handle possible auto-increment. Since it is pre-increment and
12796 we have already done it, we can just use an offset of word. */
12797 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12798 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12799 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12800 UNITS_PER_WORD));
12801 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12802 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12803 UNITS_PER_WORD));
12804 else
12805 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12806 UNITS_PER_WORD),
12807 0));
12809 if (small_data_operand (x, GET_MODE (x)))
12810 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12811 reg_names[SMALL_DATA_REG]);
12813 return;
12815 case 'N': /* Unused */
12816 /* Write the number of elements in the vector times 4. */
12817 if (GET_CODE (x) != PARALLEL)
12818 output_operand_lossage ("invalid %%N value");
12819 else
12820 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12821 return;
12823 case 'O': /* Unused */
12824 /* Similar, but subtract 1 first. */
12825 if (GET_CODE (x) != PARALLEL)
12826 output_operand_lossage ("invalid %%O value");
12827 else
12828 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12829 return;
12831 case 'p':
12832 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12833 if (! INT_P (x)
12834 || INTVAL (x) < 0
12835 || (i = exact_log2 (INTVAL (x))) < 0)
12836 output_operand_lossage ("invalid %%p value");
12837 else
12838 fprintf (file, "%d", i);
12839 return;
12841 case 'P':
12842 /* The operand must be an indirect memory reference. The result
12843 is the register name. */
12844 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12845 || REGNO (XEXP (x, 0)) >= 32)
12846 output_operand_lossage ("invalid %%P value");
12847 else
12848 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12849 return;
12851 case 'q':
12852 /* This outputs the logical code corresponding to a boolean
12853 expression. The expression may have one or both operands
12854 negated (if one, only the first one). For condition register
12855 logical operations, it will also treat the negated
12856 CR codes as NOTs, but not handle NOTs of them. */
12858 const char *const *t = 0;
12859 const char *s;
12860 enum rtx_code code = GET_CODE (x);
12861 static const char * const tbl[3][3] = {
12862 { "and", "andc", "nor" },
12863 { "or", "orc", "nand" },
12864 { "xor", "eqv", "xor" } };
12866 if (code == AND)
12867 t = tbl[0];
12868 else if (code == IOR)
12869 t = tbl[1];
12870 else if (code == XOR)
12871 t = tbl[2];
12872 else
12873 output_operand_lossage ("invalid %%q value");
12875 if (GET_CODE (XEXP (x, 0)) != NOT)
12876 s = t[0];
12877 else
12879 if (GET_CODE (XEXP (x, 1)) == NOT)
12880 s = t[2];
12881 else
12882 s = t[1];
12885 fputs (s, file);
12887 return;
12889 case 'Q':
12890 if (! TARGET_MFCRF)
12891 return;
12892 fputc (',', file);
12893 /* FALLTHRU */
12895 case 'R':
12896 /* X is a CR register. Print the mask for `mtcrf'. */
12897 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12898 output_operand_lossage ("invalid %%R value");
12899 else
12900 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12901 return;
12903 case 's':
12904 /* Low 5 bits of 32 - value */
12905 if (! INT_P (x))
12906 output_operand_lossage ("invalid %%s value");
12907 else
12908 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12909 return;
12911 case 't':
12912 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12913 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12915 output_operand_lossage ("invalid %%t value");
12916 return;
12919 /* Bit 3 is OV bit. */
12920 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12922 /* If we want bit 31, write a shift count of zero, not 32. */
12923 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12924 return;
12926 case 'T':
12927 /* Print the symbolic name of a branch target register. */
12928 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12929 x = XVECEXP (x, 0, 0);
12930 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12931 && REGNO (x) != CTR_REGNO))
12932 output_operand_lossage ("invalid %%T value");
12933 else if (REGNO (x) == LR_REGNO)
12934 fputs ("lr", file);
12935 else
12936 fputs ("ctr", file);
12937 return;
12939 case 'u':
12940 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12941 for use in unsigned operand. */
12942 if (! INT_P (x))
12944 output_operand_lossage ("invalid %%u value");
12945 return;
12948 uval = INTVAL (x);
12949 if ((uval & 0xffff) == 0)
12950 uval >>= 16;
12952 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
12953 return;
12955 case 'v':
12956 /* High-order 16 bits of constant for use in signed operand. */
12957 if (! INT_P (x))
12958 output_operand_lossage ("invalid %%v value");
12959 else
12960 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
12961 (INTVAL (x) >> 16) & 0xffff);
12962 return;
12964 case 'U':
12965 /* Print `u' if this has an auto-increment or auto-decrement. */
12966 if (MEM_P (x)
12967 && (GET_CODE (XEXP (x, 0)) == PRE_INC
12968 || GET_CODE (XEXP (x, 0)) == PRE_DEC
12969 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
12970 putc ('u', file);
12971 return;
12973 case 'V':
12974 /* Print the trap code for this operand. */
12975 switch (GET_CODE (x))
12977 case EQ:
12978 fputs ("eq", file); /* 4 */
12979 break;
12980 case NE:
12981 fputs ("ne", file); /* 24 */
12982 break;
12983 case LT:
12984 fputs ("lt", file); /* 16 */
12985 break;
12986 case LE:
12987 fputs ("le", file); /* 20 */
12988 break;
12989 case GT:
12990 fputs ("gt", file); /* 8 */
12991 break;
12992 case GE:
12993 fputs ("ge", file); /* 12 */
12994 break;
12995 case LTU:
12996 fputs ("llt", file); /* 2 */
12997 break;
12998 case LEU:
12999 fputs ("lle", file); /* 6 */
13000 break;
13001 case GTU:
13002 fputs ("lgt", file); /* 1 */
13003 break;
13004 case GEU:
13005 fputs ("lge", file); /* 5 */
13006 break;
13007 default:
13008 output_operand_lossage ("invalid %%V value");
13010 break;
13012 case 'w':
13013 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13014 normally. */
13015 if (INT_P (x))
13016 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13017 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13018 else
13019 print_operand (file, x, 0);
13020 return;
13022 case 'x':
13023 /* X is a FPR or Altivec register used in a VSX context. */
13024 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13025 output_operand_lossage ("invalid %%x value");
13026 else
13028 int reg = REGNO (x);
13029 int vsx_reg = (FP_REGNO_P (reg)
13030 ? reg - 32
13031 : reg - FIRST_ALTIVEC_REGNO + 32);
13033 #ifdef TARGET_REGNAMES
13034 if (TARGET_REGNAMES)
13035 fprintf (file, "%%vs%d", vsx_reg);
13036 else
13037 #endif
13038 fprintf (file, "%d", vsx_reg);
13040 return;
13042 case 'X':
13043 if (MEM_P (x)
13044 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13045 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13046 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13047 putc ('x', file);
13048 return;
13050 case 'Y':
13051 /* Like 'L', for third word of TImode/PTImode */
13052 if (REG_P (x))
13053 fputs (reg_names[REGNO (x) + 2], file);
13054 else if (MEM_P (x))
13056 machine_mode mode = GET_MODE (x);
13057 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13058 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13059 output_address (mode, plus_constant (Pmode,
13060 XEXP (XEXP (x, 0), 0), 8));
13061 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13062 output_address (mode, plus_constant (Pmode,
13063 XEXP (XEXP (x, 0), 0), 8));
13064 else
13065 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13066 if (small_data_operand (x, GET_MODE (x)))
13067 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13068 reg_names[SMALL_DATA_REG]);
13070 return;
13072 case 'z':
13073 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13074 x = XVECEXP (x, 0, 1);
13075 /* X is a SYMBOL_REF. Write out the name preceded by a
13076 period and without any trailing data in brackets. Used for function
13077 names. If we are configured for System V (or the embedded ABI) on
13078 the PowerPC, do not emit the period, since those systems do not use
13079 TOCs and the like. */
13080 if (!SYMBOL_REF_P (x))
13082 output_operand_lossage ("invalid %%z value");
13083 return;
13086 /* For macho, check to see if we need a stub. */
13087 if (TARGET_MACHO)
13089 const char *name = XSTR (x, 0);
13090 #if TARGET_MACHO
13091 if (darwin_symbol_stubs
13092 && MACHOPIC_INDIRECT
13093 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13094 name = machopic_indirection_name (x, /*stub_p=*/true);
13095 #endif
13096 assemble_name (file, name);
13098 else if (!DOT_SYMBOLS)
13099 assemble_name (file, XSTR (x, 0));
13100 else
13101 rs6000_output_function_entry (file, XSTR (x, 0));
13102 return;
13104 case 'Z':
13105 /* Like 'L', for last word of TImode/PTImode. */
13106 if (REG_P (x))
13107 fputs (reg_names[REGNO (x) + 3], file);
13108 else if (MEM_P (x))
13110 machine_mode mode = GET_MODE (x);
13111 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13112 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13113 output_address (mode, plus_constant (Pmode,
13114 XEXP (XEXP (x, 0), 0), 12));
13115 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13116 output_address (mode, plus_constant (Pmode,
13117 XEXP (XEXP (x, 0), 0), 12));
13118 else
13119 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13120 if (small_data_operand (x, GET_MODE (x)))
13121 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13122 reg_names[SMALL_DATA_REG]);
13124 return;
13126 /* Print AltiVec memory operand. */
13127 case 'y':
13129 rtx tmp;
13131 gcc_assert (MEM_P (x));
13133 tmp = XEXP (x, 0);
13135 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13136 && GET_CODE (tmp) == AND
13137 && CONST_INT_P (XEXP (tmp, 1))
13138 && INTVAL (XEXP (tmp, 1)) == -16)
13139 tmp = XEXP (tmp, 0);
13140 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13141 && GET_CODE (tmp) == PRE_MODIFY)
13142 tmp = XEXP (tmp, 1);
13143 if (REG_P (tmp))
13144 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13145 else
13147 if (GET_CODE (tmp) != PLUS
13148 || !REG_P (XEXP (tmp, 0))
13149 || !REG_P (XEXP (tmp, 1)))
13151 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13152 break;
13155 if (REGNO (XEXP (tmp, 0)) == 0)
13156 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13157 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13158 else
13159 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13160 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13162 break;
13165 case 0:
13166 if (REG_P (x))
13167 fprintf (file, "%s", reg_names[REGNO (x)]);
13168 else if (MEM_P (x))
13170 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13171 know the width from the mode. */
13172 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13173 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13174 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13175 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13176 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13177 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13178 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13179 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13180 else
13181 output_address (GET_MODE (x), XEXP (x, 0));
13183 else if (toc_relative_expr_p (x, false,
13184 &tocrel_base_oac, &tocrel_offset_oac))
13185 /* This hack along with a corresponding hack in
13186 rs6000_output_addr_const_extra arranges to output addends
13187 where the assembler expects to find them. eg.
13188 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13189 without this hack would be output as "x@toc+4". We
13190 want "x+4@toc". */
13191 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13192 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13193 output_addr_const (file, XVECEXP (x, 0, 0));
13194 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13195 output_addr_const (file, XVECEXP (x, 0, 1));
13196 else
13197 output_addr_const (file, x);
13198 return;
13200 case '&':
13201 if (const char *name = get_some_local_dynamic_name ())
13202 assemble_name (file, name);
13203 else
13204 output_operand_lossage ("'%%&' used without any "
13205 "local dynamic TLS references");
13206 return;
13208 default:
13209 output_operand_lossage ("invalid %%xn code");
13213 /* Print the address of an operand. */
13215 void
13216 print_operand_address (FILE *file, rtx x)
13218 if (REG_P (x))
13219 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13221 /* Is it a PC-relative address? */
13222 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13224 HOST_WIDE_INT offset;
13226 if (GET_CODE (x) == CONST)
13227 x = XEXP (x, 0);
13229 if (GET_CODE (x) == PLUS)
13231 offset = INTVAL (XEXP (x, 1));
13232 x = XEXP (x, 0);
13234 else
13235 offset = 0;
13237 output_addr_const (file, x);
13239 if (offset)
13240 fprintf (file, "%+" PRId64, offset);
13242 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13243 fprintf (file, "@got");
13245 fprintf (file, "@pcrel");
13247 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13248 || GET_CODE (x) == LABEL_REF)
13250 output_addr_const (file, x);
13251 if (small_data_operand (x, GET_MODE (x)))
13252 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13253 reg_names[SMALL_DATA_REG]);
13254 else
13255 gcc_assert (!TARGET_TOC);
13257 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13258 && REG_P (XEXP (x, 1)))
13260 if (REGNO (XEXP (x, 0)) == 0)
13261 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13262 reg_names[ REGNO (XEXP (x, 0)) ]);
13263 else
13264 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13265 reg_names[ REGNO (XEXP (x, 1)) ]);
13267 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13268 && CONST_INT_P (XEXP (x, 1)))
13269 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13270 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13271 #if TARGET_MACHO
13272 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13273 && CONSTANT_P (XEXP (x, 1)))
13275 fprintf (file, "lo16(");
13276 output_addr_const (file, XEXP (x, 1));
13277 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13279 #endif
13280 #if TARGET_ELF
13281 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13282 && CONSTANT_P (XEXP (x, 1)))
13284 output_addr_const (file, XEXP (x, 1));
13285 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13287 #endif
13288 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13290 /* This hack along with a corresponding hack in
13291 rs6000_output_addr_const_extra arranges to output addends
13292 where the assembler expects to find them. eg.
13293 (lo_sum (reg 9)
13294 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13295 without this hack would be output as "x@toc+8@l(9)". We
13296 want "x+8@toc@l(9)". */
13297 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13298 if (GET_CODE (x) == LO_SUM)
13299 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13300 else
13301 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13303 else
13304 output_addr_const (file, x);
13307 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13309 bool
13310 rs6000_output_addr_const_extra (FILE *file, rtx x)
13312 if (GET_CODE (x) == UNSPEC)
13313 switch (XINT (x, 1))
13315 case UNSPEC_TOCREL:
13316 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13317 && REG_P (XVECEXP (x, 0, 1))
13318 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13319 output_addr_const (file, XVECEXP (x, 0, 0));
13320 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13322 if (INTVAL (tocrel_offset_oac) >= 0)
13323 fprintf (file, "+");
13324 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13326 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13328 putc ('-', file);
13329 assemble_name (file, toc_label_name);
13330 need_toc_init = 1;
13332 else if (TARGET_ELF)
13333 fputs ("@toc", file);
13334 return true;
13336 #if TARGET_MACHO
13337 case UNSPEC_MACHOPIC_OFFSET:
13338 output_addr_const (file, XVECEXP (x, 0, 0));
13339 putc ('-', file);
13340 machopic_output_function_base_name (file);
13341 return true;
13342 #endif
13344 return false;
13347 /* Target hook for assembling integer objects. The PowerPC version has
13348 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13349 is defined. It also needs to handle DI-mode objects on 64-bit
13350 targets. */
13352 static bool
13353 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13355 #ifdef RELOCATABLE_NEEDS_FIXUP
13356 /* Special handling for SI values. */
13357 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13359 static int recurse = 0;
13361 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13362 the .fixup section. Since the TOC section is already relocated, we
13363 don't need to mark it here. We used to skip the text section, but it
13364 should never be valid for relocated addresses to be placed in the text
13365 section. */
13366 if (DEFAULT_ABI == ABI_V4
13367 && (TARGET_RELOCATABLE || flag_pic > 1)
13368 && in_section != toc_section
13369 && !recurse
13370 && !CONST_SCALAR_INT_P (x)
13371 && CONSTANT_P (x))
13373 char buf[256];
13375 recurse = 1;
13376 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13377 fixuplabelno++;
13378 ASM_OUTPUT_LABEL (asm_out_file, buf);
13379 fprintf (asm_out_file, "\t.long\t(");
13380 output_addr_const (asm_out_file, x);
13381 fprintf (asm_out_file, ")@fixup\n");
13382 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13383 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13384 fprintf (asm_out_file, "\t.long\t");
13385 assemble_name (asm_out_file, buf);
13386 fprintf (asm_out_file, "\n\t.previous\n");
13387 recurse = 0;
13388 return true;
13390 /* Remove initial .'s to turn a -mcall-aixdesc function
13391 address into the address of the descriptor, not the function
13392 itself. */
13393 else if (SYMBOL_REF_P (x)
13394 && XSTR (x, 0)[0] == '.'
13395 && DEFAULT_ABI == ABI_AIX)
13397 const char *name = XSTR (x, 0);
13398 while (*name == '.')
13399 name++;
13401 fprintf (asm_out_file, "\t.long\t%s\n", name);
13402 return true;
13405 #endif /* RELOCATABLE_NEEDS_FIXUP */
13406 return default_assemble_integer (x, size, aligned_p);
13409 /* Return a template string for assembly to emit when making an
13410 external call. FUNOP is the call mem argument operand number. */
13412 static const char *
13413 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13415 /* -Wformat-overflow workaround, without which gcc thinks that %u
13416 might produce 10 digits. */
13417 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13419 char arg[12];
13420 arg[0] = 0;
13421 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13423 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13424 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13425 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13426 sprintf (arg, "(%%&@tlsld)");
13429 /* The magic 32768 offset here corresponds to the offset of
13430 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13431 char z[11];
13432 sprintf (z, "%%z%u%s", funop,
13433 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13434 ? "+32768" : ""));
13436 static char str[32]; /* 1 spare */
13437 if (rs6000_pcrel_p (cfun))
13438 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13439 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13440 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13441 sibcall ? "" : "\n\tnop");
13442 else if (DEFAULT_ABI == ABI_V4)
13443 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13444 flag_pic ? "@plt" : "");
13445 #if TARGET_MACHO
13446 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13447 else if (DEFAULT_ABI == ABI_DARWIN)
13449 /* The cookie is in operand func+2. */
13450 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13451 int cookie = INTVAL (operands[funop + 2]);
13452 if (cookie & CALL_LONG)
13454 tree funname = get_identifier (XSTR (operands[funop], 0));
13455 tree labelname = get_prev_label (funname);
13456 gcc_checking_assert (labelname && !sibcall);
13458 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13459 instruction will reach 'foo', otherwise link as 'bl L42'".
13460 "L42" should be a 'branch island', that will do a far jump to
13461 'foo'. Branch islands are generated in
13462 macho_branch_islands(). */
13463 sprintf (str, "jbsr %%z%u,%.10s", funop,
13464 IDENTIFIER_POINTER (labelname));
13466 else
13467 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13468 after the call. */
13469 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13471 #endif
13472 else
13473 gcc_unreachable ();
13474 return str;
13477 const char *
13478 rs6000_call_template (rtx *operands, unsigned int funop)
13480 return rs6000_call_template_1 (operands, funop, false);
13483 const char *
13484 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13486 return rs6000_call_template_1 (operands, funop, true);
13489 /* As above, for indirect calls. */
13491 static const char *
13492 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13493 bool sibcall)
13495 /* -Wformat-overflow workaround, without which gcc thinks that %u
13496 might produce 10 digits. Note that -Wformat-overflow will not
13497 currently warn here for str[], so do not rely on a warning to
13498 ensure str[] is correctly sized. */
13499 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13501 /* Currently, funop is either 0 or 1. The maximum string is always
13502 a !speculate 64-bit __tls_get_addr call.
13504 ABI_ELFv2, pcrel:
13505 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13506 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13507 . 9 crset 2\n\t
13508 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13509 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13510 . 8 beq%T1l-
13511 .---
13512 .142
13514 ABI_AIX:
13515 . 9 ld 2,%3\n\t
13516 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13517 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13518 . 9 crset 2\n\t
13519 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13520 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13521 . 10 beq%T1l-\n\t
13522 . 10 ld 2,%4(1)
13523 .---
13524 .151
13526 ABI_ELFv2:
13527 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13528 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13529 . 9 crset 2\n\t
13530 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13531 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13532 . 10 beq%T1l-\n\t
13533 . 10 ld 2,%3(1)
13534 .---
13535 .142
13537 ABI_V4:
13538 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13539 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13540 . 9 crset 2\n\t
13541 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13542 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13543 . 8 beq%T1l-
13544 .---
13545 .141 */
13546 static char str[160]; /* 8 spare */
13547 char *s = str;
13548 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13550 if (DEFAULT_ABI == ABI_AIX)
13551 s += sprintf (s,
13552 "l%s 2,%%%u\n\t",
13553 ptrload, funop + 2);
13555 /* We don't need the extra code to stop indirect call speculation if
13556 calling via LR. */
13557 bool speculate = (TARGET_MACHO
13558 || rs6000_speculate_indirect_jumps
13559 || (REG_P (operands[funop])
13560 && REGNO (operands[funop]) == LR_REGNO));
13562 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13564 const char *rel64 = TARGET_64BIT ? "64" : "";
13565 char tls[29];
13566 tls[0] = 0;
13567 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13569 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13570 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13571 rel64, funop + 1);
13572 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13573 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13574 rel64);
13577 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13578 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13579 && flag_pic == 2 ? "+32768" : "");
13580 if (!speculate)
13582 s += sprintf (s,
13583 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13584 tls, rel64, notoc, funop, addend);
13585 s += sprintf (s, "crset 2\n\t");
13587 s += sprintf (s,
13588 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13589 tls, rel64, notoc, funop, addend);
13591 else if (!speculate)
13592 s += sprintf (s, "crset 2\n\t");
13594 if (rs6000_pcrel_p (cfun))
13596 if (speculate)
13597 sprintf (s, "b%%T%ul", funop);
13598 else
13599 sprintf (s, "beq%%T%ul-", funop);
13601 else if (DEFAULT_ABI == ABI_AIX)
13603 if (speculate)
13604 sprintf (s,
13605 "b%%T%ul\n\t"
13606 "l%s 2,%%%u(1)",
13607 funop, ptrload, funop + 3);
13608 else
13609 sprintf (s,
13610 "beq%%T%ul-\n\t"
13611 "l%s 2,%%%u(1)",
13612 funop, ptrload, funop + 3);
13614 else if (DEFAULT_ABI == ABI_ELFv2)
13616 if (speculate)
13617 sprintf (s,
13618 "b%%T%ul\n\t"
13619 "l%s 2,%%%u(1)",
13620 funop, ptrload, funop + 2);
13621 else
13622 sprintf (s,
13623 "beq%%T%ul-\n\t"
13624 "l%s 2,%%%u(1)",
13625 funop, ptrload, funop + 2);
13627 else
13629 if (speculate)
13630 sprintf (s,
13631 "b%%T%u%s",
13632 funop, sibcall ? "" : "l");
13633 else
13634 sprintf (s,
13635 "beq%%T%u%s-%s",
13636 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13638 return str;
13641 const char *
13642 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13644 return rs6000_indirect_call_template_1 (operands, funop, false);
13647 const char *
13648 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13650 return rs6000_indirect_call_template_1 (operands, funop, true);
13653 #if HAVE_AS_PLTSEQ
13654 /* Output indirect call insns. WHICH identifies the type of sequence. */
13655 const char *
13656 rs6000_pltseq_template (rtx *operands, int which)
13658 const char *rel64 = TARGET_64BIT ? "64" : "";
13659 char tls[30];
13660 tls[0] = 0;
13661 if (GET_CODE (operands[3]) == UNSPEC)
13663 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13664 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13665 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13666 off, rel64);
13667 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13668 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13669 off, rel64);
13672 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13673 static char str[96]; /* 10 spare */
13674 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13675 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13676 && flag_pic == 2 ? "+32768" : "");
13677 switch (which)
13679 case RS6000_PLTSEQ_TOCSAVE:
13680 sprintf (str,
13681 "st%s\n\t"
13682 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13683 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13684 tls, rel64);
13685 break;
13686 case RS6000_PLTSEQ_PLT16_HA:
13687 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13688 sprintf (str,
13689 "lis %%0,0\n\t"
13690 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13691 tls, off, rel64);
13692 else
13693 sprintf (str,
13694 "addis %%0,%%1,0\n\t"
13695 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13696 tls, off, rel64, addend);
13697 break;
13698 case RS6000_PLTSEQ_PLT16_LO:
13699 sprintf (str,
13700 "l%s %%0,0(%%1)\n\t"
13701 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13702 TARGET_64BIT ? "d" : "wz",
13703 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13704 break;
13705 case RS6000_PLTSEQ_MTCTR:
13706 sprintf (str,
13707 "mtctr %%1\n\t"
13708 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13709 tls, rel64, addend);
13710 break;
13711 case RS6000_PLTSEQ_PLT_PCREL34:
13712 sprintf (str,
13713 "pl%s %%0,0(0),1\n\t"
13714 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13715 TARGET_64BIT ? "d" : "wz",
13716 tls, rel64);
13717 break;
13718 default:
13719 gcc_unreachable ();
13721 return str;
13723 #endif
13725 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13726 /* Emit an assembler directive to set symbol visibility for DECL to
13727 VISIBILITY_TYPE. */
13729 static void
13730 rs6000_assemble_visibility (tree decl, int vis)
13732 if (TARGET_XCOFF)
13733 return;
13735 /* Functions need to have their entry point symbol visibility set as
13736 well as their descriptor symbol visibility. */
13737 if (DEFAULT_ABI == ABI_AIX
13738 && DOT_SYMBOLS
13739 && TREE_CODE (decl) == FUNCTION_DECL)
13741 static const char * const visibility_types[] = {
13742 NULL, "protected", "hidden", "internal"
13745 const char *name, *type;
13747 name = ((* targetm.strip_name_encoding)
13748 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13749 type = visibility_types[vis];
13751 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13752 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13754 else
13755 default_assemble_visibility (decl, vis);
13757 #endif
13759 enum rtx_code
13760 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13762 /* Reversal of FP compares takes care -- an ordered compare
13763 becomes an unordered compare and vice versa. */
13764 if (mode == CCFPmode
13765 && (!flag_finite_math_only
13766 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13767 || code == UNEQ || code == LTGT))
13768 return reverse_condition_maybe_unordered (code);
13769 else
13770 return reverse_condition (code);
13773 /* Generate a compare for CODE. Return a brand-new rtx that
13774 represents the result of the compare. */
13776 static rtx
13777 rs6000_generate_compare (rtx cmp, machine_mode mode)
13779 machine_mode comp_mode;
13780 rtx compare_result;
13781 enum rtx_code code = GET_CODE (cmp);
13782 rtx op0 = XEXP (cmp, 0);
13783 rtx op1 = XEXP (cmp, 1);
13785 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13786 comp_mode = CCmode;
13787 else if (FLOAT_MODE_P (mode))
13788 comp_mode = CCFPmode;
13789 else if (code == GTU || code == LTU
13790 || code == GEU || code == LEU)
13791 comp_mode = CCUNSmode;
13792 else if ((code == EQ || code == NE)
13793 && unsigned_reg_p (op0)
13794 && (unsigned_reg_p (op1)
13795 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13796 /* These are unsigned values, perhaps there will be a later
13797 ordering compare that can be shared with this one. */
13798 comp_mode = CCUNSmode;
13799 else
13800 comp_mode = CCmode;
13802 /* If we have an unsigned compare, make sure we don't have a signed value as
13803 an immediate. */
13804 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13805 && INTVAL (op1) < 0)
13807 op0 = copy_rtx_if_shared (op0);
13808 op1 = force_reg (GET_MODE (op0), op1);
13809 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13812 /* First, the compare. */
13813 compare_result = gen_reg_rtx (comp_mode);
13815 /* IEEE 128-bit support in VSX registers when we do not have hardware
13816 support. */
13817 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13819 rtx libfunc = NULL_RTX;
13820 bool check_nan = false;
13821 rtx dest;
13823 switch (code)
13825 case EQ:
13826 case NE:
13827 libfunc = optab_libfunc (eq_optab, mode);
13828 break;
13830 case GT:
13831 case GE:
13832 libfunc = optab_libfunc (ge_optab, mode);
13833 break;
13835 case LT:
13836 case LE:
13837 libfunc = optab_libfunc (le_optab, mode);
13838 break;
13840 case UNORDERED:
13841 case ORDERED:
13842 libfunc = optab_libfunc (unord_optab, mode);
13843 code = (code == UNORDERED) ? NE : EQ;
13844 break;
13846 case UNGE:
13847 case UNGT:
13848 check_nan = true;
13849 libfunc = optab_libfunc (ge_optab, mode);
13850 code = (code == UNGE) ? GE : GT;
13851 break;
13853 case UNLE:
13854 case UNLT:
13855 check_nan = true;
13856 libfunc = optab_libfunc (le_optab, mode);
13857 code = (code == UNLE) ? LE : LT;
13858 break;
13860 case UNEQ:
13861 case LTGT:
13862 check_nan = true;
13863 libfunc = optab_libfunc (eq_optab, mode);
13864 code = (code = UNEQ) ? EQ : NE;
13865 break;
13867 default:
13868 gcc_unreachable ();
13871 gcc_assert (libfunc);
13873 if (!check_nan)
13874 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13875 SImode, op0, mode, op1, mode);
13877 /* The library signals an exception for signalling NaNs, so we need to
13878 handle isgreater, etc. by first checking isordered. */
13879 else
13881 rtx ne_rtx, normal_dest, unord_dest;
13882 rtx unord_func = optab_libfunc (unord_optab, mode);
13883 rtx join_label = gen_label_rtx ();
13884 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13885 rtx unord_cmp = gen_reg_rtx (comp_mode);
13888 /* Test for either value being a NaN. */
13889 gcc_assert (unord_func);
13890 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13891 SImode, op0, mode, op1, mode);
13893 /* Set value (0) if either value is a NaN, and jump to the join
13894 label. */
13895 dest = gen_reg_rtx (SImode);
13896 emit_move_insn (dest, const1_rtx);
13897 emit_insn (gen_rtx_SET (unord_cmp,
13898 gen_rtx_COMPARE (comp_mode, unord_dest,
13899 const0_rtx)));
13901 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13902 emit_jump_insn (gen_rtx_SET (pc_rtx,
13903 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13904 join_ref,
13905 pc_rtx)));
13907 /* Do the normal comparison, knowing that the values are not
13908 NaNs. */
13909 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13910 SImode, op0, mode, op1, mode);
13912 emit_insn (gen_cstoresi4 (dest,
13913 gen_rtx_fmt_ee (code, SImode, normal_dest,
13914 const0_rtx),
13915 normal_dest, const0_rtx));
13917 /* Join NaN and non-Nan paths. Compare dest against 0. */
13918 emit_label (join_label);
13919 code = NE;
13922 emit_insn (gen_rtx_SET (compare_result,
13923 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13926 else
13928 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13929 CLOBBERs to match cmptf_internal2 pattern. */
13930 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13931 && FLOAT128_IBM_P (GET_MODE (op0))
13932 && TARGET_HARD_FLOAT)
13933 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13934 gen_rtvec (10,
13935 gen_rtx_SET (compare_result,
13936 gen_rtx_COMPARE (comp_mode, op0, op1)),
13937 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13938 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13939 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13940 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13941 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13942 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13943 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13944 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13945 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
13946 else if (GET_CODE (op1) == UNSPEC
13947 && XINT (op1, 1) == UNSPEC_SP_TEST)
13949 rtx op1b = XVECEXP (op1, 0, 0);
13950 comp_mode = CCEQmode;
13951 compare_result = gen_reg_rtx (CCEQmode);
13952 if (TARGET_64BIT)
13953 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
13954 else
13955 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
13957 else
13958 emit_insn (gen_rtx_SET (compare_result,
13959 gen_rtx_COMPARE (comp_mode, op0, op1)));
13962 /* Some kinds of FP comparisons need an OR operation;
13963 under flag_finite_math_only we don't bother. */
13964 if (FLOAT_MODE_P (mode)
13965 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
13966 && !flag_finite_math_only
13967 && (code == LE || code == GE
13968 || code == UNEQ || code == LTGT
13969 || code == UNGT || code == UNLT))
13971 enum rtx_code or1, or2;
13972 rtx or1_rtx, or2_rtx, compare2_rtx;
13973 rtx or_result = gen_reg_rtx (CCEQmode);
13975 switch (code)
13977 case LE: or1 = LT; or2 = EQ; break;
13978 case GE: or1 = GT; or2 = EQ; break;
13979 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
13980 case LTGT: or1 = LT; or2 = GT; break;
13981 case UNGT: or1 = UNORDERED; or2 = GT; break;
13982 case UNLT: or1 = UNORDERED; or2 = LT; break;
13983 default: gcc_unreachable ();
13985 validate_condition_mode (or1, comp_mode);
13986 validate_condition_mode (or2, comp_mode);
13987 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
13988 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
13989 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
13990 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
13991 const_true_rtx);
13992 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
13994 compare_result = or_result;
13995 code = EQ;
13998 validate_condition_mode (code, GET_MODE (compare_result));
14000 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14004 /* Return the diagnostic message string if the binary operation OP is
14005 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14007 static const char*
14008 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14009 const_tree type1,
14010 const_tree type2)
14012 machine_mode mode1 = TYPE_MODE (type1);
14013 machine_mode mode2 = TYPE_MODE (type2);
14015 /* For complex modes, use the inner type. */
14016 if (COMPLEX_MODE_P (mode1))
14017 mode1 = GET_MODE_INNER (mode1);
14019 if (COMPLEX_MODE_P (mode2))
14020 mode2 = GET_MODE_INNER (mode2);
14022 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14023 double to intermix unless -mfloat128-convert. */
14024 if (mode1 == mode2)
14025 return NULL;
14027 if (!TARGET_FLOAT128_CVT)
14029 if ((mode1 == KFmode && mode2 == IFmode)
14030 || (mode1 == IFmode && mode2 == KFmode))
14031 return N_("__float128 and __ibm128 cannot be used in the same "
14032 "expression");
14034 if (TARGET_IEEEQUAD
14035 && ((mode1 == IFmode && mode2 == TFmode)
14036 || (mode1 == TFmode && mode2 == IFmode)))
14037 return N_("__ibm128 and long double cannot be used in the same "
14038 "expression");
14040 if (!TARGET_IEEEQUAD
14041 && ((mode1 == KFmode && mode2 == TFmode)
14042 || (mode1 == TFmode && mode2 == KFmode)))
14043 return N_("__float128 and long double cannot be used in the same "
14044 "expression");
14047 return NULL;
14051 /* Expand floating point conversion to/from __float128 and __ibm128. */
14053 void
14054 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14056 machine_mode dest_mode = GET_MODE (dest);
14057 machine_mode src_mode = GET_MODE (src);
14058 convert_optab cvt = unknown_optab;
14059 bool do_move = false;
14060 rtx libfunc = NULL_RTX;
14061 rtx dest2;
14062 typedef rtx (*rtx_2func_t) (rtx, rtx);
14063 rtx_2func_t hw_convert = (rtx_2func_t)0;
14064 size_t kf_or_tf;
14066 struct hw_conv_t {
14067 rtx_2func_t from_df;
14068 rtx_2func_t from_sf;
14069 rtx_2func_t from_si_sign;
14070 rtx_2func_t from_si_uns;
14071 rtx_2func_t from_di_sign;
14072 rtx_2func_t from_di_uns;
14073 rtx_2func_t to_df;
14074 rtx_2func_t to_sf;
14075 rtx_2func_t to_si_sign;
14076 rtx_2func_t to_si_uns;
14077 rtx_2func_t to_di_sign;
14078 rtx_2func_t to_di_uns;
14079 } hw_conversions[2] = {
14080 /* convertions to/from KFmode */
14082 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14083 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14084 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14085 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14086 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14087 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14088 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14089 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14090 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14091 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14092 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14093 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14096 /* convertions to/from TFmode */
14098 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14099 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14100 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14101 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14102 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14103 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14104 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14105 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14106 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14107 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14108 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14109 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14113 if (dest_mode == src_mode)
14114 gcc_unreachable ();
14116 /* Eliminate memory operations. */
14117 if (MEM_P (src))
14118 src = force_reg (src_mode, src);
14120 if (MEM_P (dest))
14122 rtx tmp = gen_reg_rtx (dest_mode);
14123 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14124 rs6000_emit_move (dest, tmp, dest_mode);
14125 return;
14128 /* Convert to IEEE 128-bit floating point. */
14129 if (FLOAT128_IEEE_P (dest_mode))
14131 if (dest_mode == KFmode)
14132 kf_or_tf = 0;
14133 else if (dest_mode == TFmode)
14134 kf_or_tf = 1;
14135 else
14136 gcc_unreachable ();
14138 switch (src_mode)
14140 case E_DFmode:
14141 cvt = sext_optab;
14142 hw_convert = hw_conversions[kf_or_tf].from_df;
14143 break;
14145 case E_SFmode:
14146 cvt = sext_optab;
14147 hw_convert = hw_conversions[kf_or_tf].from_sf;
14148 break;
14150 case E_KFmode:
14151 case E_IFmode:
14152 case E_TFmode:
14153 if (FLOAT128_IBM_P (src_mode))
14154 cvt = sext_optab;
14155 else
14156 do_move = true;
14157 break;
14159 case E_SImode:
14160 if (unsigned_p)
14162 cvt = ufloat_optab;
14163 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14165 else
14167 cvt = sfloat_optab;
14168 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14170 break;
14172 case E_DImode:
14173 if (unsigned_p)
14175 cvt = ufloat_optab;
14176 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14178 else
14180 cvt = sfloat_optab;
14181 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14183 break;
14185 default:
14186 gcc_unreachable ();
14190 /* Convert from IEEE 128-bit floating point. */
14191 else if (FLOAT128_IEEE_P (src_mode))
14193 if (src_mode == KFmode)
14194 kf_or_tf = 0;
14195 else if (src_mode == TFmode)
14196 kf_or_tf = 1;
14197 else
14198 gcc_unreachable ();
14200 switch (dest_mode)
14202 case E_DFmode:
14203 cvt = trunc_optab;
14204 hw_convert = hw_conversions[kf_or_tf].to_df;
14205 break;
14207 case E_SFmode:
14208 cvt = trunc_optab;
14209 hw_convert = hw_conversions[kf_or_tf].to_sf;
14210 break;
14212 case E_KFmode:
14213 case E_IFmode:
14214 case E_TFmode:
14215 if (FLOAT128_IBM_P (dest_mode))
14216 cvt = trunc_optab;
14217 else
14218 do_move = true;
14219 break;
14221 case E_SImode:
14222 if (unsigned_p)
14224 cvt = ufix_optab;
14225 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14227 else
14229 cvt = sfix_optab;
14230 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14232 break;
14234 case E_DImode:
14235 if (unsigned_p)
14237 cvt = ufix_optab;
14238 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14240 else
14242 cvt = sfix_optab;
14243 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14245 break;
14247 default:
14248 gcc_unreachable ();
14252 /* Both IBM format. */
14253 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14254 do_move = true;
14256 else
14257 gcc_unreachable ();
14259 /* Handle conversion between TFmode/KFmode/IFmode. */
14260 if (do_move)
14261 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14263 /* Handle conversion if we have hardware support. */
14264 else if (TARGET_FLOAT128_HW && hw_convert)
14265 emit_insn ((hw_convert) (dest, src));
14267 /* Call an external function to do the conversion. */
14268 else if (cvt != unknown_optab)
14270 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14271 gcc_assert (libfunc != NULL_RTX);
14273 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14274 src, src_mode);
14276 gcc_assert (dest2 != NULL_RTX);
14277 if (!rtx_equal_p (dest, dest2))
14278 emit_move_insn (dest, dest2);
14281 else
14282 gcc_unreachable ();
14284 return;
14288 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14289 can be used as that dest register. Return the dest register. */
14292 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14294 if (op2 == const0_rtx)
14295 return op1;
14297 if (GET_CODE (scratch) == SCRATCH)
14298 scratch = gen_reg_rtx (mode);
14300 if (logical_operand (op2, mode))
14301 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14302 else
14303 emit_insn (gen_rtx_SET (scratch,
14304 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14306 return scratch;
14309 void
14310 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14312 rtx condition_rtx;
14313 machine_mode op_mode;
14314 enum rtx_code cond_code;
14315 rtx result = operands[0];
14317 condition_rtx = rs6000_generate_compare (operands[1], mode);
14318 cond_code = GET_CODE (condition_rtx);
14320 if (cond_code == NE
14321 || cond_code == GE || cond_code == LE
14322 || cond_code == GEU || cond_code == LEU
14323 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14325 rtx not_result = gen_reg_rtx (CCEQmode);
14326 rtx not_op, rev_cond_rtx;
14327 machine_mode cc_mode;
14329 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14331 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14332 SImode, XEXP (condition_rtx, 0), const0_rtx);
14333 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14334 emit_insn (gen_rtx_SET (not_result, not_op));
14335 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14338 op_mode = GET_MODE (XEXP (operands[1], 0));
14339 if (op_mode == VOIDmode)
14340 op_mode = GET_MODE (XEXP (operands[1], 1));
14342 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14344 PUT_MODE (condition_rtx, DImode);
14345 convert_move (result, condition_rtx, 0);
14347 else
14349 PUT_MODE (condition_rtx, SImode);
14350 emit_insn (gen_rtx_SET (result, condition_rtx));
14354 /* Emit a branch of kind CODE to location LOC. */
14356 void
14357 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14359 rtx condition_rtx, loc_ref;
14361 condition_rtx = rs6000_generate_compare (operands[0], mode);
14362 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14363 emit_jump_insn (gen_rtx_SET (pc_rtx,
14364 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
14365 loc_ref, pc_rtx)));
14368 /* Return the string to output a conditional branch to LABEL, which is
14369 the operand template of the label, or NULL if the branch is really a
14370 conditional return.
14372 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14373 condition code register and its mode specifies what kind of
14374 comparison we made.
14376 REVERSED is nonzero if we should reverse the sense of the comparison.
14378 INSN is the insn. */
14380 char *
14381 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14383 static char string[64];
14384 enum rtx_code code = GET_CODE (op);
14385 rtx cc_reg = XEXP (op, 0);
14386 machine_mode mode = GET_MODE (cc_reg);
14387 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14388 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14389 int really_reversed = reversed ^ need_longbranch;
14390 char *s = string;
14391 const char *ccode;
14392 const char *pred;
14393 rtx note;
14395 validate_condition_mode (code, mode);
14397 /* Work out which way this really branches. We could use
14398 reverse_condition_maybe_unordered here always but this
14399 makes the resulting assembler clearer. */
14400 if (really_reversed)
14402 /* Reversal of FP compares takes care -- an ordered compare
14403 becomes an unordered compare and vice versa. */
14404 if (mode == CCFPmode)
14405 code = reverse_condition_maybe_unordered (code);
14406 else
14407 code = reverse_condition (code);
14410 switch (code)
14412 /* Not all of these are actually distinct opcodes, but
14413 we distinguish them for clarity of the resulting assembler. */
14414 case NE: case LTGT:
14415 ccode = "ne"; break;
14416 case EQ: case UNEQ:
14417 ccode = "eq"; break;
14418 case GE: case GEU:
14419 ccode = "ge"; break;
14420 case GT: case GTU: case UNGT:
14421 ccode = "gt"; break;
14422 case LE: case LEU:
14423 ccode = "le"; break;
14424 case LT: case LTU: case UNLT:
14425 ccode = "lt"; break;
14426 case UNORDERED: ccode = "un"; break;
14427 case ORDERED: ccode = "nu"; break;
14428 case UNGE: ccode = "nl"; break;
14429 case UNLE: ccode = "ng"; break;
14430 default:
14431 gcc_unreachable ();
14434 /* Maybe we have a guess as to how likely the branch is. */
14435 pred = "";
14436 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14437 if (note != NULL_RTX)
14439 /* PROB is the difference from 50%. */
14440 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14441 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14443 /* Only hint for highly probable/improbable branches on newer cpus when
14444 we have real profile data, as static prediction overrides processor
14445 dynamic prediction. For older cpus we may as well always hint, but
14446 assume not taken for branches that are very close to 50% as a
14447 mispredicted taken branch is more expensive than a
14448 mispredicted not-taken branch. */
14449 if (rs6000_always_hint
14450 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14451 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14452 && br_prob_note_reliable_p (note)))
14454 if (abs (prob) > REG_BR_PROB_BASE / 20
14455 && ((prob > 0) ^ need_longbranch))
14456 pred = "+";
14457 else
14458 pred = "-";
14462 if (label == NULL)
14463 s += sprintf (s, "b%slr%s ", ccode, pred);
14464 else
14465 s += sprintf (s, "b%s%s ", ccode, pred);
14467 /* We need to escape any '%' characters in the reg_names string.
14468 Assume they'd only be the first character.... */
14469 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14470 *s++ = '%';
14471 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14473 if (label != NULL)
14475 /* If the branch distance was too far, we may have to use an
14476 unconditional branch to go the distance. */
14477 if (need_longbranch)
14478 s += sprintf (s, ",$+8\n\tb %s", label);
14479 else
14480 s += sprintf (s, ",%s", label);
14483 return string;
14486 /* Return insn for VSX or Altivec comparisons. */
14488 static rtx
14489 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14491 rtx mask;
14492 machine_mode mode = GET_MODE (op0);
14494 switch (code)
14496 default:
14497 break;
14499 case GE:
14500 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14501 return NULL_RTX;
14502 /* FALLTHRU */
14504 case EQ:
14505 case GT:
14506 case GTU:
14507 case ORDERED:
14508 case UNORDERED:
14509 case UNEQ:
14510 case LTGT:
14511 mask = gen_reg_rtx (mode);
14512 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14513 return mask;
14516 return NULL_RTX;
14519 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14520 DMODE is expected destination mode. This is a recursive function. */
14522 static rtx
14523 rs6000_emit_vector_compare (enum rtx_code rcode,
14524 rtx op0, rtx op1,
14525 machine_mode dmode)
14527 rtx mask;
14528 bool swap_operands = false;
14529 bool try_again = false;
14531 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14532 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14534 /* See if the comparison works as is. */
14535 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14536 if (mask)
14537 return mask;
14539 switch (rcode)
14541 case LT:
14542 rcode = GT;
14543 swap_operands = true;
14544 try_again = true;
14545 break;
14546 case LTU:
14547 rcode = GTU;
14548 swap_operands = true;
14549 try_again = true;
14550 break;
14551 case NE:
14552 case UNLE:
14553 case UNLT:
14554 case UNGE:
14555 case UNGT:
14556 /* Invert condition and try again.
14557 e.g., A != B becomes ~(A==B). */
14559 enum rtx_code rev_code;
14560 enum insn_code nor_code;
14561 rtx mask2;
14563 rev_code = reverse_condition_maybe_unordered (rcode);
14564 if (rev_code == UNKNOWN)
14565 return NULL_RTX;
14567 nor_code = optab_handler (one_cmpl_optab, dmode);
14568 if (nor_code == CODE_FOR_nothing)
14569 return NULL_RTX;
14571 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14572 if (!mask2)
14573 return NULL_RTX;
14575 mask = gen_reg_rtx (dmode);
14576 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14577 return mask;
14579 break;
14580 case GE:
14581 case GEU:
14582 case LE:
14583 case LEU:
14584 /* Try GT/GTU/LT/LTU OR EQ */
14586 rtx c_rtx, eq_rtx;
14587 enum insn_code ior_code;
14588 enum rtx_code new_code;
14590 switch (rcode)
14592 case GE:
14593 new_code = GT;
14594 break;
14596 case GEU:
14597 new_code = GTU;
14598 break;
14600 case LE:
14601 new_code = LT;
14602 break;
14604 case LEU:
14605 new_code = LTU;
14606 break;
14608 default:
14609 gcc_unreachable ();
14612 ior_code = optab_handler (ior_optab, dmode);
14613 if (ior_code == CODE_FOR_nothing)
14614 return NULL_RTX;
14616 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14617 if (!c_rtx)
14618 return NULL_RTX;
14620 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14621 if (!eq_rtx)
14622 return NULL_RTX;
14624 mask = gen_reg_rtx (dmode);
14625 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14626 return mask;
14628 break;
14629 default:
14630 return NULL_RTX;
14633 if (try_again)
14635 if (swap_operands)
14636 std::swap (op0, op1);
14638 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14639 if (mask)
14640 return mask;
14643 /* You only get two chances. */
14644 return NULL_RTX;
14647 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14648 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14649 operands for the relation operation COND. */
14652 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14653 rtx cond, rtx cc_op0, rtx cc_op1)
14655 machine_mode dest_mode = GET_MODE (dest);
14656 machine_mode mask_mode = GET_MODE (cc_op0);
14657 enum rtx_code rcode = GET_CODE (cond);
14658 machine_mode cc_mode = CCmode;
14659 rtx mask;
14660 rtx cond2;
14661 bool invert_move = false;
14663 if (VECTOR_UNIT_NONE_P (dest_mode))
14664 return 0;
14666 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14667 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14669 switch (rcode)
14671 /* Swap operands if we can, and fall back to doing the operation as
14672 specified, and doing a NOR to invert the test. */
14673 case NE:
14674 case UNLE:
14675 case UNLT:
14676 case UNGE:
14677 case UNGT:
14678 /* Invert condition and try again.
14679 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14680 invert_move = true;
14681 rcode = reverse_condition_maybe_unordered (rcode);
14682 if (rcode == UNKNOWN)
14683 return 0;
14684 break;
14686 case GE:
14687 case LE:
14688 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14690 /* Invert condition to avoid compound test. */
14691 invert_move = true;
14692 rcode = reverse_condition (rcode);
14694 break;
14696 case GTU:
14697 case GEU:
14698 case LTU:
14699 case LEU:
14700 /* Mark unsigned tests with CCUNSmode. */
14701 cc_mode = CCUNSmode;
14703 /* Invert condition to avoid compound test if necessary. */
14704 if (rcode == GEU || rcode == LEU)
14706 invert_move = true;
14707 rcode = reverse_condition (rcode);
14709 break;
14711 default:
14712 break;
14715 /* Get the vector mask for the given relational operations. */
14716 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14718 if (!mask)
14719 return 0;
14721 if (invert_move)
14722 std::swap (op_true, op_false);
14724 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14725 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14726 && (GET_CODE (op_true) == CONST_VECTOR
14727 || GET_CODE (op_false) == CONST_VECTOR))
14729 rtx constant_0 = CONST0_RTX (dest_mode);
14730 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14732 if (op_true == constant_m1 && op_false == constant_0)
14734 emit_move_insn (dest, mask);
14735 return 1;
14738 else if (op_true == constant_0 && op_false == constant_m1)
14740 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14741 return 1;
14744 /* If we can't use the vector comparison directly, perhaps we can use
14745 the mask for the true or false fields, instead of loading up a
14746 constant. */
14747 if (op_true == constant_m1)
14748 op_true = mask;
14750 if (op_false == constant_0)
14751 op_false = mask;
14754 if (!REG_P (op_true) && !SUBREG_P (op_true))
14755 op_true = force_reg (dest_mode, op_true);
14757 if (!REG_P (op_false) && !SUBREG_P (op_false))
14758 op_false = force_reg (dest_mode, op_false);
14760 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14761 CONST0_RTX (dest_mode));
14762 emit_insn (gen_rtx_SET (dest,
14763 gen_rtx_IF_THEN_ELSE (dest_mode,
14764 cond2,
14765 op_true,
14766 op_false)));
14767 return 1;
14770 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14771 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14772 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14773 hardware has no such operation. */
14775 static int
14776 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14778 enum rtx_code code = GET_CODE (op);
14779 rtx op0 = XEXP (op, 0);
14780 rtx op1 = XEXP (op, 1);
14781 machine_mode compare_mode = GET_MODE (op0);
14782 machine_mode result_mode = GET_MODE (dest);
14783 bool max_p = false;
14785 if (result_mode != compare_mode)
14786 return 0;
14788 if (code == GE || code == GT)
14789 max_p = true;
14790 else if (code == LE || code == LT)
14791 max_p = false;
14792 else
14793 return 0;
14795 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14798 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
14799 max_p = !max_p;
14801 else
14802 return 0;
14804 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14805 return 1;
14808 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14809 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14810 operands of the last comparison is nonzero/true, FALSE_COND if it is
14811 zero/false. Return 0 if the hardware has no such operation. */
14813 static int
14814 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14816 enum rtx_code code = GET_CODE (op);
14817 rtx op0 = XEXP (op, 0);
14818 rtx op1 = XEXP (op, 1);
14819 machine_mode result_mode = GET_MODE (dest);
14820 rtx compare_rtx;
14821 rtx cmove_rtx;
14822 rtx clobber_rtx;
14824 if (!can_create_pseudo_p ())
14825 return 0;
14827 switch (code)
14829 case EQ:
14830 case GE:
14831 case GT:
14832 break;
14834 case NE:
14835 case LT:
14836 case LE:
14837 code = swap_condition (code);
14838 std::swap (op0, op1);
14839 break;
14841 default:
14842 return 0;
14845 /* Generate: [(parallel [(set (dest)
14846 (if_then_else (op (cmp1) (cmp2))
14847 (true)
14848 (false)))
14849 (clobber (scratch))])]. */
14851 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14852 cmove_rtx = gen_rtx_SET (dest,
14853 gen_rtx_IF_THEN_ELSE (result_mode,
14854 compare_rtx,
14855 true_cond,
14856 false_cond));
14858 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14859 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14860 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14862 return 1;
14865 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14866 operands of the last comparison is nonzero/true, FALSE_COND if it
14867 is zero/false. Return 0 if the hardware has no such operation. */
14870 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14872 enum rtx_code code = GET_CODE (op);
14873 rtx op0 = XEXP (op, 0);
14874 rtx op1 = XEXP (op, 1);
14875 machine_mode compare_mode = GET_MODE (op0);
14876 machine_mode result_mode = GET_MODE (dest);
14877 rtx temp;
14878 bool is_against_zero;
14880 /* These modes should always match. */
14881 if (GET_MODE (op1) != compare_mode
14882 /* In the isel case however, we can use a compare immediate, so
14883 op1 may be a small constant. */
14884 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14885 return 0;
14886 if (GET_MODE (true_cond) != result_mode)
14887 return 0;
14888 if (GET_MODE (false_cond) != result_mode)
14889 return 0;
14891 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14892 if (TARGET_P9_MINMAX
14893 && (compare_mode == SFmode || compare_mode == DFmode)
14894 && (result_mode == SFmode || result_mode == DFmode))
14896 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14897 return 1;
14899 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14900 return 1;
14903 /* Don't allow using floating point comparisons for integer results for
14904 now. */
14905 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14906 return 0;
14908 /* First, work out if the hardware can do this at all, or
14909 if it's too slow.... */
14910 if (!FLOAT_MODE_P (compare_mode))
14912 if (TARGET_ISEL)
14913 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14914 return 0;
14917 is_against_zero = op1 == CONST0_RTX (compare_mode);
14919 /* A floating-point subtract might overflow, underflow, or produce
14920 an inexact result, thus changing the floating-point flags, so it
14921 can't be generated if we care about that. It's safe if one side
14922 of the construct is zero, since then no subtract will be
14923 generated. */
14924 if (SCALAR_FLOAT_MODE_P (compare_mode)
14925 && flag_trapping_math && ! is_against_zero)
14926 return 0;
14928 /* Eliminate half of the comparisons by switching operands, this
14929 makes the remaining code simpler. */
14930 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14931 || code == LTGT || code == LT || code == UNLE)
14933 code = reverse_condition_maybe_unordered (code);
14934 temp = true_cond;
14935 true_cond = false_cond;
14936 false_cond = temp;
14939 /* UNEQ and LTGT take four instructions for a comparison with zero,
14940 it'll probably be faster to use a branch here too. */
14941 if (code == UNEQ && HONOR_NANS (compare_mode))
14942 return 0;
14944 /* We're going to try to implement comparisons by performing
14945 a subtract, then comparing against zero. Unfortunately,
14946 Inf - Inf is NaN which is not zero, and so if we don't
14947 know that the operand is finite and the comparison
14948 would treat EQ different to UNORDERED, we can't do it. */
14949 if (HONOR_INFINITIES (compare_mode)
14950 && code != GT && code != UNGE
14951 && (!CONST_DOUBLE_P (op1)
14952 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
14953 /* Constructs of the form (a OP b ? a : b) are safe. */
14954 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
14955 || (! rtx_equal_p (op0, true_cond)
14956 && ! rtx_equal_p (op1, true_cond))))
14957 return 0;
14959 /* At this point we know we can use fsel. */
14961 /* Reduce the comparison to a comparison against zero. */
14962 if (! is_against_zero)
14964 temp = gen_reg_rtx (compare_mode);
14965 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
14966 op0 = temp;
14967 op1 = CONST0_RTX (compare_mode);
14970 /* If we don't care about NaNs we can reduce some of the comparisons
14971 down to faster ones. */
14972 if (! HONOR_NANS (compare_mode))
14973 switch (code)
14975 case GT:
14976 code = LE;
14977 temp = true_cond;
14978 true_cond = false_cond;
14979 false_cond = temp;
14980 break;
14981 case UNGE:
14982 code = GE;
14983 break;
14984 case UNEQ:
14985 code = EQ;
14986 break;
14987 default:
14988 break;
14991 /* Now, reduce everything down to a GE. */
14992 switch (code)
14994 case GE:
14995 break;
14997 case LE:
14998 temp = gen_reg_rtx (compare_mode);
14999 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15000 op0 = temp;
15001 break;
15003 case ORDERED:
15004 temp = gen_reg_rtx (compare_mode);
15005 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15006 op0 = temp;
15007 break;
15009 case EQ:
15010 temp = gen_reg_rtx (compare_mode);
15011 emit_insn (gen_rtx_SET (temp,
15012 gen_rtx_NEG (compare_mode,
15013 gen_rtx_ABS (compare_mode, op0))));
15014 op0 = temp;
15015 break;
15017 case UNGE:
15018 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15019 temp = gen_reg_rtx (result_mode);
15020 emit_insn (gen_rtx_SET (temp,
15021 gen_rtx_IF_THEN_ELSE (result_mode,
15022 gen_rtx_GE (VOIDmode,
15023 op0, op1),
15024 true_cond, false_cond)));
15025 false_cond = true_cond;
15026 true_cond = temp;
15028 temp = gen_reg_rtx (compare_mode);
15029 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15030 op0 = temp;
15031 break;
15033 case GT:
15034 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15035 temp = gen_reg_rtx (result_mode);
15036 emit_insn (gen_rtx_SET (temp,
15037 gen_rtx_IF_THEN_ELSE (result_mode,
15038 gen_rtx_GE (VOIDmode,
15039 op0, op1),
15040 true_cond, false_cond)));
15041 true_cond = false_cond;
15042 false_cond = temp;
15044 temp = gen_reg_rtx (compare_mode);
15045 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15046 op0 = temp;
15047 break;
15049 default:
15050 gcc_unreachable ();
15053 emit_insn (gen_rtx_SET (dest,
15054 gen_rtx_IF_THEN_ELSE (result_mode,
15055 gen_rtx_GE (VOIDmode,
15056 op0, op1),
15057 true_cond, false_cond)));
15058 return 1;
15061 /* Same as above, but for ints (isel). */
15064 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15066 rtx condition_rtx, cr;
15067 machine_mode mode = GET_MODE (dest);
15068 enum rtx_code cond_code;
15069 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15070 bool signedp;
15072 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15073 return 0;
15075 /* We still have to do the compare, because isel doesn't do a
15076 compare, it just looks at the CRx bits set by a previous compare
15077 instruction. */
15078 condition_rtx = rs6000_generate_compare (op, mode);
15079 cond_code = GET_CODE (condition_rtx);
15080 cr = XEXP (condition_rtx, 0);
15081 signedp = GET_MODE (cr) == CCmode;
15083 isel_func = (mode == SImode
15084 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15085 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15087 switch (cond_code)
15089 case LT: case GT: case LTU: case GTU: case EQ:
15090 /* isel handles these directly. */
15091 break;
15093 default:
15094 /* We need to swap the sense of the comparison. */
15096 std::swap (false_cond, true_cond);
15097 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15099 break;
15102 false_cond = force_reg (mode, false_cond);
15103 if (true_cond != const0_rtx)
15104 true_cond = force_reg (mode, true_cond);
15106 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15108 return 1;
15111 void
15112 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15114 machine_mode mode = GET_MODE (op0);
15115 enum rtx_code c;
15116 rtx target;
15118 /* VSX/altivec have direct min/max insns. */
15119 if ((code == SMAX || code == SMIN)
15120 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15121 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15123 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15124 return;
15127 if (code == SMAX || code == SMIN)
15128 c = GE;
15129 else
15130 c = GEU;
15132 if (code == SMAX || code == UMAX)
15133 target = emit_conditional_move (dest, c, op0, op1, mode,
15134 op0, op1, mode, 0);
15135 else
15136 target = emit_conditional_move (dest, c, op0, op1, mode,
15137 op1, op0, mode, 0);
15138 gcc_assert (target);
15139 if (target != dest)
15140 emit_move_insn (dest, target);
15143 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15144 COND is true. Mark the jump as unlikely to be taken. */
15146 static void
15147 emit_unlikely_jump (rtx cond, rtx label)
15149 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15150 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15151 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15154 /* A subroutine of the atomic operation splitters. Emit a load-locked
15155 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15156 the zero_extend operation. */
15158 static void
15159 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15161 rtx (*fn) (rtx, rtx) = NULL;
15163 switch (mode)
15165 case E_QImode:
15166 fn = gen_load_lockedqi;
15167 break;
15168 case E_HImode:
15169 fn = gen_load_lockedhi;
15170 break;
15171 case E_SImode:
15172 if (GET_MODE (mem) == QImode)
15173 fn = gen_load_lockedqi_si;
15174 else if (GET_MODE (mem) == HImode)
15175 fn = gen_load_lockedhi_si;
15176 else
15177 fn = gen_load_lockedsi;
15178 break;
15179 case E_DImode:
15180 fn = gen_load_lockeddi;
15181 break;
15182 case E_TImode:
15183 fn = gen_load_lockedti;
15184 break;
15185 default:
15186 gcc_unreachable ();
15188 emit_insn (fn (reg, mem));
15191 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15192 instruction in MODE. */
15194 static void
15195 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15197 rtx (*fn) (rtx, rtx, rtx) = NULL;
15199 switch (mode)
15201 case E_QImode:
15202 fn = gen_store_conditionalqi;
15203 break;
15204 case E_HImode:
15205 fn = gen_store_conditionalhi;
15206 break;
15207 case E_SImode:
15208 fn = gen_store_conditionalsi;
15209 break;
15210 case E_DImode:
15211 fn = gen_store_conditionaldi;
15212 break;
15213 case E_TImode:
15214 fn = gen_store_conditionalti;
15215 break;
15216 default:
15217 gcc_unreachable ();
15220 /* Emit sync before stwcx. to address PPC405 Erratum. */
15221 if (PPC405_ERRATUM77)
15222 emit_insn (gen_hwsync ());
15224 emit_insn (fn (res, mem, val));
15227 /* Expand barriers before and after a load_locked/store_cond sequence. */
15229 static rtx
15230 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15232 rtx addr = XEXP (mem, 0);
15234 if (!legitimate_indirect_address_p (addr, reload_completed)
15235 && !legitimate_indexed_address_p (addr, reload_completed))
15237 addr = force_reg (Pmode, addr);
15238 mem = replace_equiv_address_nv (mem, addr);
15241 switch (model)
15243 case MEMMODEL_RELAXED:
15244 case MEMMODEL_CONSUME:
15245 case MEMMODEL_ACQUIRE:
15246 break;
15247 case MEMMODEL_RELEASE:
15248 case MEMMODEL_ACQ_REL:
15249 emit_insn (gen_lwsync ());
15250 break;
15251 case MEMMODEL_SEQ_CST:
15252 emit_insn (gen_hwsync ());
15253 break;
15254 default:
15255 gcc_unreachable ();
15257 return mem;
15260 static void
15261 rs6000_post_atomic_barrier (enum memmodel model)
15263 switch (model)
15265 case MEMMODEL_RELAXED:
15266 case MEMMODEL_CONSUME:
15267 case MEMMODEL_RELEASE:
15268 break;
15269 case MEMMODEL_ACQUIRE:
15270 case MEMMODEL_ACQ_REL:
15271 case MEMMODEL_SEQ_CST:
15272 emit_insn (gen_isync ());
15273 break;
15274 default:
15275 gcc_unreachable ();
15279 /* A subroutine of the various atomic expanders. For sub-word operations,
15280 we must adjust things to operate on SImode. Given the original MEM,
15281 return a new aligned memory. Also build and return the quantities by
15282 which to shift and mask. */
15284 static rtx
15285 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15287 rtx addr, align, shift, mask, mem;
15288 HOST_WIDE_INT shift_mask;
15289 machine_mode mode = GET_MODE (orig_mem);
15291 /* For smaller modes, we have to implement this via SImode. */
15292 shift_mask = (mode == QImode ? 0x18 : 0x10);
15294 addr = XEXP (orig_mem, 0);
15295 addr = force_reg (GET_MODE (addr), addr);
15297 /* Aligned memory containing subword. Generate a new memory. We
15298 do not want any of the existing MEM_ATTR data, as we're now
15299 accessing memory outside the original object. */
15300 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15301 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15302 mem = gen_rtx_MEM (SImode, align);
15303 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15304 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15305 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15307 /* Shift amount for subword relative to aligned word. */
15308 shift = gen_reg_rtx (SImode);
15309 addr = gen_lowpart (SImode, addr);
15310 rtx tmp = gen_reg_rtx (SImode);
15311 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15312 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15313 if (BYTES_BIG_ENDIAN)
15314 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15315 shift, 1, OPTAB_LIB_WIDEN);
15316 *pshift = shift;
15318 /* Mask for insertion. */
15319 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15320 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15321 *pmask = mask;
15323 return mem;
15326 /* A subroutine of the various atomic expanders. For sub-word operands,
15327 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15329 static rtx
15330 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15332 rtx x;
15334 x = gen_reg_rtx (SImode);
15335 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15336 gen_rtx_NOT (SImode, mask),
15337 oldval)));
15339 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15341 return x;
15344 /* A subroutine of the various atomic expanders. For sub-word operands,
15345 extract WIDE to NARROW via SHIFT. */
15347 static void
15348 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15350 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15351 wide, 1, OPTAB_LIB_WIDEN);
15352 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15355 /* Expand an atomic compare and swap operation. */
15357 void
15358 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15360 rtx boolval, retval, mem, oldval, newval, cond;
15361 rtx label1, label2, x, mask, shift;
15362 machine_mode mode, orig_mode;
15363 enum memmodel mod_s, mod_f;
15364 bool is_weak;
15366 boolval = operands[0];
15367 retval = operands[1];
15368 mem = operands[2];
15369 oldval = operands[3];
15370 newval = operands[4];
15371 is_weak = (INTVAL (operands[5]) != 0);
15372 mod_s = memmodel_base (INTVAL (operands[6]));
15373 mod_f = memmodel_base (INTVAL (operands[7]));
15374 orig_mode = mode = GET_MODE (mem);
15376 mask = shift = NULL_RTX;
15377 if (mode == QImode || mode == HImode)
15379 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15380 lwarx and shift/mask operations. With power8, we need to do the
15381 comparison in SImode, but the store is still done in QI/HImode. */
15382 oldval = convert_modes (SImode, mode, oldval, 1);
15384 if (!TARGET_SYNC_HI_QI)
15386 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15388 /* Shift and mask OLDVAL into position with the word. */
15389 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15390 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15392 /* Shift and mask NEWVAL into position within the word. */
15393 newval = convert_modes (SImode, mode, newval, 1);
15394 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15395 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15398 /* Prepare to adjust the return value. */
15399 retval = gen_reg_rtx (SImode);
15400 mode = SImode;
15402 else if (reg_overlap_mentioned_p (retval, oldval))
15403 oldval = copy_to_reg (oldval);
15405 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15406 oldval = copy_to_mode_reg (mode, oldval);
15408 if (reg_overlap_mentioned_p (retval, newval))
15409 newval = copy_to_reg (newval);
15411 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15413 label1 = NULL_RTX;
15414 if (!is_weak)
15416 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15417 emit_label (XEXP (label1, 0));
15419 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15421 emit_load_locked (mode, retval, mem);
15423 x = retval;
15424 if (mask)
15425 x = expand_simple_binop (SImode, AND, retval, mask,
15426 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15428 cond = gen_reg_rtx (CCmode);
15429 /* If we have TImode, synthesize a comparison. */
15430 if (mode != TImode)
15431 x = gen_rtx_COMPARE (CCmode, x, oldval);
15432 else
15434 rtx xor1_result = gen_reg_rtx (DImode);
15435 rtx xor2_result = gen_reg_rtx (DImode);
15436 rtx or_result = gen_reg_rtx (DImode);
15437 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15438 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15439 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15440 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15442 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15443 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15444 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15445 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15448 emit_insn (gen_rtx_SET (cond, x));
15450 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15451 emit_unlikely_jump (x, label2);
15453 x = newval;
15454 if (mask)
15455 x = rs6000_mask_atomic_subword (retval, newval, mask);
15457 emit_store_conditional (orig_mode, cond, mem, x);
15459 if (!is_weak)
15461 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15462 emit_unlikely_jump (x, label1);
15465 if (!is_mm_relaxed (mod_f))
15466 emit_label (XEXP (label2, 0));
15468 rs6000_post_atomic_barrier (mod_s);
15470 if (is_mm_relaxed (mod_f))
15471 emit_label (XEXP (label2, 0));
15473 if (shift)
15474 rs6000_finish_atomic_subword (operands[1], retval, shift);
15475 else if (mode != GET_MODE (operands[1]))
15476 convert_move (operands[1], retval, 1);
15478 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15479 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15480 emit_insn (gen_rtx_SET (boolval, x));
15483 /* Expand an atomic exchange operation. */
15485 void
15486 rs6000_expand_atomic_exchange (rtx operands[])
15488 rtx retval, mem, val, cond;
15489 machine_mode mode;
15490 enum memmodel model;
15491 rtx label, x, mask, shift;
15493 retval = operands[0];
15494 mem = operands[1];
15495 val = operands[2];
15496 model = memmodel_base (INTVAL (operands[3]));
15497 mode = GET_MODE (mem);
15499 mask = shift = NULL_RTX;
15500 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15502 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15504 /* Shift and mask VAL into position with the word. */
15505 val = convert_modes (SImode, mode, val, 1);
15506 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15507 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15509 /* Prepare to adjust the return value. */
15510 retval = gen_reg_rtx (SImode);
15511 mode = SImode;
15514 mem = rs6000_pre_atomic_barrier (mem, model);
15516 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15517 emit_label (XEXP (label, 0));
15519 emit_load_locked (mode, retval, mem);
15521 x = val;
15522 if (mask)
15523 x = rs6000_mask_atomic_subword (retval, val, mask);
15525 cond = gen_reg_rtx (CCmode);
15526 emit_store_conditional (mode, cond, mem, x);
15528 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15529 emit_unlikely_jump (x, label);
15531 rs6000_post_atomic_barrier (model);
15533 if (shift)
15534 rs6000_finish_atomic_subword (operands[0], retval, shift);
15537 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15538 to perform. MEM is the memory on which to operate. VAL is the second
15539 operand of the binary operator. BEFORE and AFTER are optional locations to
15540 return the value of MEM either before of after the operation. MODEL_RTX
15541 is a CONST_INT containing the memory model to use. */
15543 void
15544 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15545 rtx orig_before, rtx orig_after, rtx model_rtx)
15547 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15548 machine_mode mode = GET_MODE (mem);
15549 machine_mode store_mode = mode;
15550 rtx label, x, cond, mask, shift;
15551 rtx before = orig_before, after = orig_after;
15553 mask = shift = NULL_RTX;
15554 /* On power8, we want to use SImode for the operation. On previous systems,
15555 use the operation in a subword and shift/mask to get the proper byte or
15556 halfword. */
15557 if (mode == QImode || mode == HImode)
15559 if (TARGET_SYNC_HI_QI)
15561 val = convert_modes (SImode, mode, val, 1);
15563 /* Prepare to adjust the return value. */
15564 before = gen_reg_rtx (SImode);
15565 if (after)
15566 after = gen_reg_rtx (SImode);
15567 mode = SImode;
15569 else
15571 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15573 /* Shift and mask VAL into position with the word. */
15574 val = convert_modes (SImode, mode, val, 1);
15575 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15576 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15578 switch (code)
15580 case IOR:
15581 case XOR:
15582 /* We've already zero-extended VAL. That is sufficient to
15583 make certain that it does not affect other bits. */
15584 mask = NULL;
15585 break;
15587 case AND:
15588 /* If we make certain that all of the other bits in VAL are
15589 set, that will be sufficient to not affect other bits. */
15590 x = gen_rtx_NOT (SImode, mask);
15591 x = gen_rtx_IOR (SImode, x, val);
15592 emit_insn (gen_rtx_SET (val, x));
15593 mask = NULL;
15594 break;
15596 case NOT:
15597 case PLUS:
15598 case MINUS:
15599 /* These will all affect bits outside the field and need
15600 adjustment via MASK within the loop. */
15601 break;
15603 default:
15604 gcc_unreachable ();
15607 /* Prepare to adjust the return value. */
15608 before = gen_reg_rtx (SImode);
15609 if (after)
15610 after = gen_reg_rtx (SImode);
15611 store_mode = mode = SImode;
15615 mem = rs6000_pre_atomic_barrier (mem, model);
15617 label = gen_label_rtx ();
15618 emit_label (label);
15619 label = gen_rtx_LABEL_REF (VOIDmode, label);
15621 if (before == NULL_RTX)
15622 before = gen_reg_rtx (mode);
15624 emit_load_locked (mode, before, mem);
15626 if (code == NOT)
15628 x = expand_simple_binop (mode, AND, before, val,
15629 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15630 after = expand_simple_unop (mode, NOT, x, after, 1);
15632 else
15634 after = expand_simple_binop (mode, code, before, val,
15635 after, 1, OPTAB_LIB_WIDEN);
15638 x = after;
15639 if (mask)
15641 x = expand_simple_binop (SImode, AND, after, mask,
15642 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15643 x = rs6000_mask_atomic_subword (before, x, mask);
15645 else if (store_mode != mode)
15646 x = convert_modes (store_mode, mode, x, 1);
15648 cond = gen_reg_rtx (CCmode);
15649 emit_store_conditional (store_mode, cond, mem, x);
15651 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15652 emit_unlikely_jump (x, label);
15654 rs6000_post_atomic_barrier (model);
15656 if (shift)
15658 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15659 then do the calcuations in a SImode register. */
15660 if (orig_before)
15661 rs6000_finish_atomic_subword (orig_before, before, shift);
15662 if (orig_after)
15663 rs6000_finish_atomic_subword (orig_after, after, shift);
15665 else if (store_mode != mode)
15667 /* QImode/HImode on machines with lbarx/lharx where we do the native
15668 operation and then do the calcuations in a SImode register. */
15669 if (orig_before)
15670 convert_move (orig_before, before, 1);
15671 if (orig_after)
15672 convert_move (orig_after, after, 1);
15674 else if (orig_after && after != orig_after)
15675 emit_move_insn (orig_after, after);
15678 /* Emit instructions to move SRC to DST. Called by splitters for
15679 multi-register moves. It will emit at most one instruction for
15680 each register that is accessed; that is, it won't emit li/lis pairs
15681 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15682 register. */
15684 void
15685 rs6000_split_multireg_move (rtx dst, rtx src)
15687 /* The register number of the first register being moved. */
15688 int reg;
15689 /* The mode that is to be moved. */
15690 machine_mode mode;
15691 /* The mode that the move is being done in, and its size. */
15692 machine_mode reg_mode;
15693 int reg_mode_size;
15694 /* The number of registers that will be moved. */
15695 int nregs;
15697 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15698 mode = GET_MODE (dst);
15699 nregs = hard_regno_nregs (reg, mode);
15700 if (FP_REGNO_P (reg))
15701 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15702 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15703 else if (ALTIVEC_REGNO_P (reg))
15704 reg_mode = V16QImode;
15705 else
15706 reg_mode = word_mode;
15707 reg_mode_size = GET_MODE_SIZE (reg_mode);
15709 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15711 /* TDmode residing in FP registers is special, since the ISA requires that
15712 the lower-numbered word of a register pair is always the most significant
15713 word, even in little-endian mode. This does not match the usual subreg
15714 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15715 the appropriate constituent registers "by hand" in little-endian mode.
15717 Note we do not need to check for destructive overlap here since TDmode
15718 can only reside in even/odd register pairs. */
15719 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15721 rtx p_src, p_dst;
15722 int i;
15724 for (i = 0; i < nregs; i++)
15726 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15727 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15728 else
15729 p_src = simplify_gen_subreg (reg_mode, src, mode,
15730 i * reg_mode_size);
15732 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15733 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15734 else
15735 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15736 i * reg_mode_size);
15738 emit_insn (gen_rtx_SET (p_dst, p_src));
15741 return;
15744 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15746 /* Move register range backwards, if we might have destructive
15747 overlap. */
15748 int i;
15749 for (i = nregs - 1; i >= 0; i--)
15750 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15751 i * reg_mode_size),
15752 simplify_gen_subreg (reg_mode, src, mode,
15753 i * reg_mode_size)));
15755 else
15757 int i;
15758 int j = -1;
15759 bool used_update = false;
15760 rtx restore_basereg = NULL_RTX;
15762 if (MEM_P (src) && INT_REGNO_P (reg))
15764 rtx breg;
15766 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15767 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15769 rtx delta_rtx;
15770 breg = XEXP (XEXP (src, 0), 0);
15771 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15772 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15773 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15774 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15775 src = replace_equiv_address (src, breg);
15777 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15779 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15781 rtx basereg = XEXP (XEXP (src, 0), 0);
15782 if (TARGET_UPDATE)
15784 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15785 emit_insn (gen_rtx_SET (ndst,
15786 gen_rtx_MEM (reg_mode,
15787 XEXP (src, 0))));
15788 used_update = true;
15790 else
15791 emit_insn (gen_rtx_SET (basereg,
15792 XEXP (XEXP (src, 0), 1)));
15793 src = replace_equiv_address (src, basereg);
15795 else
15797 rtx basereg = gen_rtx_REG (Pmode, reg);
15798 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15799 src = replace_equiv_address (src, basereg);
15803 breg = XEXP (src, 0);
15804 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15805 breg = XEXP (breg, 0);
15807 /* If the base register we are using to address memory is
15808 also a destination reg, then change that register last. */
15809 if (REG_P (breg)
15810 && REGNO (breg) >= REGNO (dst)
15811 && REGNO (breg) < REGNO (dst) + nregs)
15812 j = REGNO (breg) - REGNO (dst);
15814 else if (MEM_P (dst) && INT_REGNO_P (reg))
15816 rtx breg;
15818 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15819 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15821 rtx delta_rtx;
15822 breg = XEXP (XEXP (dst, 0), 0);
15823 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15824 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15825 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15827 /* We have to update the breg before doing the store.
15828 Use store with update, if available. */
15830 if (TARGET_UPDATE)
15832 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15833 emit_insn (TARGET_32BIT
15834 ? (TARGET_POWERPC64
15835 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15836 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15837 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15838 used_update = true;
15840 else
15841 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15842 dst = replace_equiv_address (dst, breg);
15844 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15845 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15847 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15849 rtx basereg = XEXP (XEXP (dst, 0), 0);
15850 if (TARGET_UPDATE)
15852 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15853 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15854 XEXP (dst, 0)),
15855 nsrc));
15856 used_update = true;
15858 else
15859 emit_insn (gen_rtx_SET (basereg,
15860 XEXP (XEXP (dst, 0), 1)));
15861 dst = replace_equiv_address (dst, basereg);
15863 else
15865 rtx basereg = XEXP (XEXP (dst, 0), 0);
15866 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15867 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15868 && REG_P (basereg)
15869 && REG_P (offsetreg)
15870 && REGNO (basereg) != REGNO (offsetreg));
15871 if (REGNO (basereg) == 0)
15873 rtx tmp = offsetreg;
15874 offsetreg = basereg;
15875 basereg = tmp;
15877 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15878 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15879 dst = replace_equiv_address (dst, basereg);
15882 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15883 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15886 for (i = 0; i < nregs; i++)
15888 /* Calculate index to next subword. */
15889 ++j;
15890 if (j == nregs)
15891 j = 0;
15893 /* If compiler already emitted move of first word by
15894 store with update, no need to do anything. */
15895 if (j == 0 && used_update)
15896 continue;
15898 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15899 j * reg_mode_size),
15900 simplify_gen_subreg (reg_mode, src, mode,
15901 j * reg_mode_size)));
15903 if (restore_basereg != NULL_RTX)
15904 emit_insn (restore_basereg);
15908 static GTY(()) alias_set_type TOC_alias_set = -1;
15910 alias_set_type
15911 get_TOC_alias_set (void)
15913 if (TOC_alias_set == -1)
15914 TOC_alias_set = new_alias_set ();
15915 return TOC_alias_set;
15918 /* The mode the ABI uses for a word. This is not the same as word_mode
15919 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15921 static scalar_int_mode
15922 rs6000_abi_word_mode (void)
15924 return TARGET_32BIT ? SImode : DImode;
15927 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15928 static char *
15929 rs6000_offload_options (void)
15931 if (TARGET_64BIT)
15932 return xstrdup ("-foffload-abi=lp64");
15933 else
15934 return xstrdup ("-foffload-abi=ilp32");
15938 /* A quick summary of the various types of 'constant-pool tables'
15939 under PowerPC:
15941 Target Flags Name One table per
15942 AIX (none) AIX TOC object file
15943 AIX -mfull-toc AIX TOC object file
15944 AIX -mminimal-toc AIX minimal TOC translation unit
15945 SVR4/EABI (none) SVR4 SDATA object file
15946 SVR4/EABI -fpic SVR4 pic object file
15947 SVR4/EABI -fPIC SVR4 PIC translation unit
15948 SVR4/EABI -mrelocatable EABI TOC function
15949 SVR4/EABI -maix AIX TOC object file
15950 SVR4/EABI -maix -mminimal-toc
15951 AIX minimal TOC translation unit
15953 Name Reg. Set by entries contains:
15954 made by addrs? fp? sum?
15956 AIX TOC 2 crt0 as Y option option
15957 AIX minimal TOC 30 prolog gcc Y Y option
15958 SVR4 SDATA 13 crt0 gcc N Y N
15959 SVR4 pic 30 prolog ld Y not yet N
15960 SVR4 PIC 30 prolog gcc Y option option
15961 EABI TOC 30 prolog gcc Y option option
15965 /* Hash functions for the hash table. */
15967 static unsigned
15968 rs6000_hash_constant (rtx k)
15970 enum rtx_code code = GET_CODE (k);
15971 machine_mode mode = GET_MODE (k);
15972 unsigned result = (code << 3) ^ mode;
15973 const char *format;
15974 int flen, fidx;
15976 format = GET_RTX_FORMAT (code);
15977 flen = strlen (format);
15978 fidx = 0;
15980 switch (code)
15982 case LABEL_REF:
15983 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
15985 case CONST_WIDE_INT:
15987 int i;
15988 flen = CONST_WIDE_INT_NUNITS (k);
15989 for (i = 0; i < flen; i++)
15990 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
15991 return result;
15994 case CONST_DOUBLE:
15995 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
15997 case CODE_LABEL:
15998 fidx = 3;
15999 break;
16001 default:
16002 break;
16005 for (; fidx < flen; fidx++)
16006 switch (format[fidx])
16008 case 's':
16010 unsigned i, len;
16011 const char *str = XSTR (k, fidx);
16012 len = strlen (str);
16013 result = result * 613 + len;
16014 for (i = 0; i < len; i++)
16015 result = result * 613 + (unsigned) str[i];
16016 break;
16018 case 'u':
16019 case 'e':
16020 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16021 break;
16022 case 'i':
16023 case 'n':
16024 result = result * 613 + (unsigned) XINT (k, fidx);
16025 break;
16026 case 'w':
16027 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16028 result = result * 613 + (unsigned) XWINT (k, fidx);
16029 else
16031 size_t i;
16032 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16033 result = result * 613 + (unsigned) (XWINT (k, fidx)
16034 >> CHAR_BIT * i);
16036 break;
16037 case '0':
16038 break;
16039 default:
16040 gcc_unreachable ();
16043 return result;
16046 hashval_t
16047 toc_hasher::hash (toc_hash_struct *thc)
16049 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16052 /* Compare H1 and H2 for equivalence. */
16054 bool
16055 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16057 rtx r1 = h1->key;
16058 rtx r2 = h2->key;
16060 if (h1->key_mode != h2->key_mode)
16061 return 0;
16063 return rtx_equal_p (r1, r2);
16066 /* These are the names given by the C++ front-end to vtables, and
16067 vtable-like objects. Ideally, this logic should not be here;
16068 instead, there should be some programmatic way of inquiring as
16069 to whether or not an object is a vtable. */
16071 #define VTABLE_NAME_P(NAME) \
16072 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16073 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16074 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16075 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16076 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16078 #ifdef NO_DOLLAR_IN_LABEL
16079 /* Return a GGC-allocated character string translating dollar signs in
16080 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16082 const char *
16083 rs6000_xcoff_strip_dollar (const char *name)
16085 char *strip, *p;
16086 const char *q;
16087 size_t len;
16089 q = (const char *) strchr (name, '$');
16091 if (q == 0 || q == name)
16092 return name;
16094 len = strlen (name);
16095 strip = XALLOCAVEC (char, len + 1);
16096 strcpy (strip, name);
16097 p = strip + (q - name);
16098 while (p)
16100 *p = '_';
16101 p = strchr (p + 1, '$');
16104 return ggc_alloc_string (strip, len);
16106 #endif
16108 void
16109 rs6000_output_symbol_ref (FILE *file, rtx x)
16111 const char *name = XSTR (x, 0);
16113 /* Currently C++ toc references to vtables can be emitted before it
16114 is decided whether the vtable is public or private. If this is
16115 the case, then the linker will eventually complain that there is
16116 a reference to an unknown section. Thus, for vtables only,
16117 we emit the TOC reference to reference the identifier and not the
16118 symbol. */
16119 if (VTABLE_NAME_P (name))
16121 RS6000_OUTPUT_BASENAME (file, name);
16123 else
16124 assemble_name (file, name);
16127 /* Output a TOC entry. We derive the entry name from what is being
16128 written. */
16130 void
16131 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16133 char buf[256];
16134 const char *name = buf;
16135 rtx base = x;
16136 HOST_WIDE_INT offset = 0;
16138 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16140 /* When the linker won't eliminate them, don't output duplicate
16141 TOC entries (this happens on AIX if there is any kind of TOC,
16142 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16143 CODE_LABELs. */
16144 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16146 struct toc_hash_struct *h;
16148 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16149 time because GGC is not initialized at that point. */
16150 if (toc_hash_table == NULL)
16151 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16153 h = ggc_alloc<toc_hash_struct> ();
16154 h->key = x;
16155 h->key_mode = mode;
16156 h->labelno = labelno;
16158 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16159 if (*found == NULL)
16160 *found = h;
16161 else /* This is indeed a duplicate.
16162 Set this label equal to that label. */
16164 fputs ("\t.set ", file);
16165 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16166 fprintf (file, "%d,", labelno);
16167 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16168 fprintf (file, "%d\n", ((*found)->labelno));
16170 #ifdef HAVE_AS_TLS
16171 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16172 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16173 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16175 fputs ("\t.set ", file);
16176 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16177 fprintf (file, "%d,", labelno);
16178 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16179 fprintf (file, "%d\n", ((*found)->labelno));
16181 #endif
16182 return;
16186 /* If we're going to put a double constant in the TOC, make sure it's
16187 aligned properly when strict alignment is on. */
16188 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16189 && STRICT_ALIGNMENT
16190 && GET_MODE_BITSIZE (mode) >= 64
16191 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16192 ASM_OUTPUT_ALIGN (file, 3);
16195 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16197 /* Handle FP constants specially. Note that if we have a minimal
16198 TOC, things we put here aren't actually in the TOC, so we can allow
16199 FP constants. */
16200 if (CONST_DOUBLE_P (x)
16201 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16202 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16204 long k[4];
16206 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16207 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16208 else
16209 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16211 if (TARGET_64BIT)
16213 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16214 fputs (DOUBLE_INT_ASM_OP, file);
16215 else
16216 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16217 k[0] & 0xffffffff, k[1] & 0xffffffff,
16218 k[2] & 0xffffffff, k[3] & 0xffffffff);
16219 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16220 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16221 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16222 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16223 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16224 return;
16226 else
16228 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16229 fputs ("\t.long ", file);
16230 else
16231 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16232 k[0] & 0xffffffff, k[1] & 0xffffffff,
16233 k[2] & 0xffffffff, k[3] & 0xffffffff);
16234 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16235 k[0] & 0xffffffff, k[1] & 0xffffffff,
16236 k[2] & 0xffffffff, k[3] & 0xffffffff);
16237 return;
16240 else if (CONST_DOUBLE_P (x)
16241 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16243 long k[2];
16245 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16246 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16247 else
16248 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16250 if (TARGET_64BIT)
16252 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16253 fputs (DOUBLE_INT_ASM_OP, file);
16254 else
16255 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16256 k[0] & 0xffffffff, k[1] & 0xffffffff);
16257 fprintf (file, "0x%lx%08lx\n",
16258 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16259 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16260 return;
16262 else
16264 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16265 fputs ("\t.long ", file);
16266 else
16267 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16268 k[0] & 0xffffffff, k[1] & 0xffffffff);
16269 fprintf (file, "0x%lx,0x%lx\n",
16270 k[0] & 0xffffffff, k[1] & 0xffffffff);
16271 return;
16274 else if (CONST_DOUBLE_P (x)
16275 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16277 long l;
16279 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16280 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16281 else
16282 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16284 if (TARGET_64BIT)
16286 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16287 fputs (DOUBLE_INT_ASM_OP, file);
16288 else
16289 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16290 if (WORDS_BIG_ENDIAN)
16291 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16292 else
16293 fprintf (file, "0x%lx\n", l & 0xffffffff);
16294 return;
16296 else
16298 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16299 fputs ("\t.long ", file);
16300 else
16301 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16302 fprintf (file, "0x%lx\n", l & 0xffffffff);
16303 return;
16306 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16308 unsigned HOST_WIDE_INT low;
16309 HOST_WIDE_INT high;
16311 low = INTVAL (x) & 0xffffffff;
16312 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16314 /* TOC entries are always Pmode-sized, so when big-endian
16315 smaller integer constants in the TOC need to be padded.
16316 (This is still a win over putting the constants in
16317 a separate constant pool, because then we'd have
16318 to have both a TOC entry _and_ the actual constant.)
16320 For a 32-bit target, CONST_INT values are loaded and shifted
16321 entirely within `low' and can be stored in one TOC entry. */
16323 /* It would be easy to make this work, but it doesn't now. */
16324 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16326 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16328 low |= high << 32;
16329 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16330 high = (HOST_WIDE_INT) low >> 32;
16331 low &= 0xffffffff;
16334 if (TARGET_64BIT)
16336 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16337 fputs (DOUBLE_INT_ASM_OP, file);
16338 else
16339 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16340 (long) high & 0xffffffff, (long) low & 0xffffffff);
16341 fprintf (file, "0x%lx%08lx\n",
16342 (long) high & 0xffffffff, (long) low & 0xffffffff);
16343 return;
16345 else
16347 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16349 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16350 fputs ("\t.long ", file);
16351 else
16352 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16353 (long) high & 0xffffffff, (long) low & 0xffffffff);
16354 fprintf (file, "0x%lx,0x%lx\n",
16355 (long) high & 0xffffffff, (long) low & 0xffffffff);
16357 else
16359 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16360 fputs ("\t.long ", file);
16361 else
16362 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16363 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16365 return;
16369 if (GET_CODE (x) == CONST)
16371 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16372 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16374 base = XEXP (XEXP (x, 0), 0);
16375 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16378 switch (GET_CODE (base))
16380 case SYMBOL_REF:
16381 name = XSTR (base, 0);
16382 break;
16384 case LABEL_REF:
16385 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16386 CODE_LABEL_NUMBER (XEXP (base, 0)));
16387 break;
16389 case CODE_LABEL:
16390 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16391 break;
16393 default:
16394 gcc_unreachable ();
16397 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16398 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16399 else
16401 fputs ("\t.tc ", file);
16402 RS6000_OUTPUT_BASENAME (file, name);
16404 if (offset < 0)
16405 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16406 else if (offset)
16407 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16409 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16410 after other TOC symbols, reducing overflow of small TOC access
16411 to [TC] symbols. */
16412 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16413 ? "[TE]," : "[TC],", file);
16416 /* Currently C++ toc references to vtables can be emitted before it
16417 is decided whether the vtable is public or private. If this is
16418 the case, then the linker will eventually complain that there is
16419 a TOC reference to an unknown section. Thus, for vtables only,
16420 we emit the TOC reference to reference the symbol and not the
16421 section. */
16422 if (VTABLE_NAME_P (name))
16424 RS6000_OUTPUT_BASENAME (file, name);
16425 if (offset < 0)
16426 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16427 else if (offset > 0)
16428 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16430 else
16431 output_addr_const (file, x);
16433 #if HAVE_AS_TLS
16434 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16436 switch (SYMBOL_REF_TLS_MODEL (base))
16438 case 0:
16439 break;
16440 case TLS_MODEL_LOCAL_EXEC:
16441 fputs ("@le", file);
16442 break;
16443 case TLS_MODEL_INITIAL_EXEC:
16444 fputs ("@ie", file);
16445 break;
16446 /* Use global-dynamic for local-dynamic. */
16447 case TLS_MODEL_GLOBAL_DYNAMIC:
16448 case TLS_MODEL_LOCAL_DYNAMIC:
16449 putc ('\n', file);
16450 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16451 fputs ("\t.tc .", file);
16452 RS6000_OUTPUT_BASENAME (file, name);
16453 fputs ("[TC],", file);
16454 output_addr_const (file, x);
16455 fputs ("@m", file);
16456 break;
16457 default:
16458 gcc_unreachable ();
16461 #endif
16463 putc ('\n', file);
16466 /* Output an assembler pseudo-op to write an ASCII string of N characters
16467 starting at P to FILE.
16469 On the RS/6000, we have to do this using the .byte operation and
16470 write out special characters outside the quoted string.
16471 Also, the assembler is broken; very long strings are truncated,
16472 so we must artificially break them up early. */
16474 void
16475 output_ascii (FILE *file, const char *p, int n)
16477 char c;
16478 int i, count_string;
16479 const char *for_string = "\t.byte \"";
16480 const char *for_decimal = "\t.byte ";
16481 const char *to_close = NULL;
16483 count_string = 0;
16484 for (i = 0; i < n; i++)
16486 c = *p++;
16487 if (c >= ' ' && c < 0177)
16489 if (for_string)
16490 fputs (for_string, file);
16491 putc (c, file);
16493 /* Write two quotes to get one. */
16494 if (c == '"')
16496 putc (c, file);
16497 ++count_string;
16500 for_string = NULL;
16501 for_decimal = "\"\n\t.byte ";
16502 to_close = "\"\n";
16503 ++count_string;
16505 if (count_string >= 512)
16507 fputs (to_close, file);
16509 for_string = "\t.byte \"";
16510 for_decimal = "\t.byte ";
16511 to_close = NULL;
16512 count_string = 0;
16515 else
16517 if (for_decimal)
16518 fputs (for_decimal, file);
16519 fprintf (file, "%d", c);
16521 for_string = "\n\t.byte \"";
16522 for_decimal = ", ";
16523 to_close = "\n";
16524 count_string = 0;
16528 /* Now close the string if we have written one. Then end the line. */
16529 if (to_close)
16530 fputs (to_close, file);
16533 /* Generate a unique section name for FILENAME for a section type
16534 represented by SECTION_DESC. Output goes into BUF.
16536 SECTION_DESC can be any string, as long as it is different for each
16537 possible section type.
16539 We name the section in the same manner as xlc. The name begins with an
16540 underscore followed by the filename (after stripping any leading directory
16541 names) with the last period replaced by the string SECTION_DESC. If
16542 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16543 the name. */
16545 void
16546 rs6000_gen_section_name (char **buf, const char *filename,
16547 const char *section_desc)
16549 const char *q, *after_last_slash, *last_period = 0;
16550 char *p;
16551 int len;
16553 after_last_slash = filename;
16554 for (q = filename; *q; q++)
16556 if (*q == '/')
16557 after_last_slash = q + 1;
16558 else if (*q == '.')
16559 last_period = q;
16562 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16563 *buf = (char *) xmalloc (len);
16565 p = *buf;
16566 *p++ = '_';
16568 for (q = after_last_slash; *q; q++)
16570 if (q == last_period)
16572 strcpy (p, section_desc);
16573 p += strlen (section_desc);
16574 break;
16577 else if (ISALNUM (*q))
16578 *p++ = *q;
16581 if (last_period == 0)
16582 strcpy (p, section_desc);
16583 else
16584 *p = '\0';
16587 /* Emit profile function. */
16589 void
16590 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16592 /* Non-standard profiling for kernels, which just saves LR then calls
16593 _mcount without worrying about arg saves. The idea is to change
16594 the function prologue as little as possible as it isn't easy to
16595 account for arg save/restore code added just for _mcount. */
16596 if (TARGET_PROFILE_KERNEL)
16597 return;
16599 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16601 #ifndef NO_PROFILE_COUNTERS
16602 # define NO_PROFILE_COUNTERS 0
16603 #endif
16604 if (NO_PROFILE_COUNTERS)
16605 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16606 LCT_NORMAL, VOIDmode);
16607 else
16609 char buf[30];
16610 const char *label_name;
16611 rtx fun;
16613 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16614 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16615 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16617 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16618 LCT_NORMAL, VOIDmode, fun, Pmode);
16621 else if (DEFAULT_ABI == ABI_DARWIN)
16623 const char *mcount_name = RS6000_MCOUNT;
16624 int caller_addr_regno = LR_REGNO;
16626 /* Be conservative and always set this, at least for now. */
16627 crtl->uses_pic_offset_table = 1;
16629 #if TARGET_MACHO
16630 /* For PIC code, set up a stub and collect the caller's address
16631 from r0, which is where the prologue puts it. */
16632 if (MACHOPIC_INDIRECT
16633 && crtl->uses_pic_offset_table)
16634 caller_addr_regno = 0;
16635 #endif
16636 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16637 LCT_NORMAL, VOIDmode,
16638 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16642 /* Write function profiler code. */
16644 void
16645 output_function_profiler (FILE *file, int labelno)
16647 char buf[100];
16649 switch (DEFAULT_ABI)
16651 default:
16652 gcc_unreachable ();
16654 case ABI_V4:
16655 if (!TARGET_32BIT)
16657 warning (0, "no profiling of 64-bit code for this ABI");
16658 return;
16660 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16661 fprintf (file, "\tmflr %s\n", reg_names[0]);
16662 if (NO_PROFILE_COUNTERS)
16664 asm_fprintf (file, "\tstw %s,4(%s)\n",
16665 reg_names[0], reg_names[1]);
16667 else if (TARGET_SECURE_PLT && flag_pic)
16669 if (TARGET_LINK_STACK)
16671 char name[32];
16672 get_ppc476_thunk_name (name);
16673 asm_fprintf (file, "\tbl %s\n", name);
16675 else
16676 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16677 asm_fprintf (file, "\tstw %s,4(%s)\n",
16678 reg_names[0], reg_names[1]);
16679 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16680 asm_fprintf (file, "\taddis %s,%s,",
16681 reg_names[12], reg_names[12]);
16682 assemble_name (file, buf);
16683 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16684 assemble_name (file, buf);
16685 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16687 else if (flag_pic == 1)
16689 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16690 asm_fprintf (file, "\tstw %s,4(%s)\n",
16691 reg_names[0], reg_names[1]);
16692 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16693 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16694 assemble_name (file, buf);
16695 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16697 else if (flag_pic > 1)
16699 asm_fprintf (file, "\tstw %s,4(%s)\n",
16700 reg_names[0], reg_names[1]);
16701 /* Now, we need to get the address of the label. */
16702 if (TARGET_LINK_STACK)
16704 char name[32];
16705 get_ppc476_thunk_name (name);
16706 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16707 assemble_name (file, buf);
16708 fputs ("-.\n1:", file);
16709 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16710 asm_fprintf (file, "\taddi %s,%s,4\n",
16711 reg_names[11], reg_names[11]);
16713 else
16715 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16716 assemble_name (file, buf);
16717 fputs ("-.\n1:", file);
16718 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16720 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16721 reg_names[0], reg_names[11]);
16722 asm_fprintf (file, "\tadd %s,%s,%s\n",
16723 reg_names[0], reg_names[0], reg_names[11]);
16725 else
16727 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16728 assemble_name (file, buf);
16729 fputs ("@ha\n", file);
16730 asm_fprintf (file, "\tstw %s,4(%s)\n",
16731 reg_names[0], reg_names[1]);
16732 asm_fprintf (file, "\tla %s,", reg_names[0]);
16733 assemble_name (file, buf);
16734 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16737 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16738 fprintf (file, "\tbl %s%s\n",
16739 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16740 break;
16742 case ABI_AIX:
16743 case ABI_ELFv2:
16744 case ABI_DARWIN:
16745 /* Don't do anything, done in output_profile_hook (). */
16746 break;
16752 /* The following variable value is the last issued insn. */
16754 static rtx_insn *last_scheduled_insn;
16756 /* The following variable helps to balance issuing of load and
16757 store instructions */
16759 static int load_store_pendulum;
16761 /* The following variable helps pair divide insns during scheduling. */
16762 static int divide_cnt;
16763 /* The following variable helps pair and alternate vector and vector load
16764 insns during scheduling. */
16765 static int vec_pairing;
16768 /* Power4 load update and store update instructions are cracked into a
16769 load or store and an integer insn which are executed in the same cycle.
16770 Branches have their own dispatch slot which does not count against the
16771 GCC issue rate, but it changes the program flow so there are no other
16772 instructions to issue in this cycle. */
16774 static int
16775 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16777 last_scheduled_insn = insn;
16778 if (GET_CODE (PATTERN (insn)) == USE
16779 || GET_CODE (PATTERN (insn)) == CLOBBER)
16781 cached_can_issue_more = more;
16782 return cached_can_issue_more;
16785 if (insn_terminates_group_p (insn, current_group))
16787 cached_can_issue_more = 0;
16788 return cached_can_issue_more;
16791 /* If no reservation, but reach here */
16792 if (recog_memoized (insn) < 0)
16793 return more;
16795 if (rs6000_sched_groups)
16797 if (is_microcoded_insn (insn))
16798 cached_can_issue_more = 0;
16799 else if (is_cracked_insn (insn))
16800 cached_can_issue_more = more > 2 ? more - 2 : 0;
16801 else
16802 cached_can_issue_more = more - 1;
16804 return cached_can_issue_more;
16807 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16808 return 0;
16810 cached_can_issue_more = more - 1;
16811 return cached_can_issue_more;
16814 static int
16815 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16817 int r = rs6000_variable_issue_1 (insn, more);
16818 if (verbose)
16819 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16820 return r;
16823 /* Adjust the cost of a scheduling dependency. Return the new cost of
16824 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16826 static int
16827 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16828 unsigned int)
16830 enum attr_type attr_type;
16832 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16833 return cost;
16835 switch (dep_type)
16837 case REG_DEP_TRUE:
16839 /* Data dependency; DEP_INSN writes a register that INSN reads
16840 some cycles later. */
16842 /* Separate a load from a narrower, dependent store. */
16843 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16844 || rs6000_tune == PROCESSOR_FUTURE)
16845 && GET_CODE (PATTERN (insn)) == SET
16846 && GET_CODE (PATTERN (dep_insn)) == SET
16847 && MEM_P (XEXP (PATTERN (insn), 1))
16848 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16849 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16850 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16851 return cost + 14;
16853 attr_type = get_attr_type (insn);
16855 switch (attr_type)
16857 case TYPE_JMPREG:
16858 /* Tell the first scheduling pass about the latency between
16859 a mtctr and bctr (and mtlr and br/blr). The first
16860 scheduling pass will not know about this latency since
16861 the mtctr instruction, which has the latency associated
16862 to it, will be generated by reload. */
16863 return 4;
16864 case TYPE_BRANCH:
16865 /* Leave some extra cycles between a compare and its
16866 dependent branch, to inhibit expensive mispredicts. */
16867 if ((rs6000_tune == PROCESSOR_PPC603
16868 || rs6000_tune == PROCESSOR_PPC604
16869 || rs6000_tune == PROCESSOR_PPC604e
16870 || rs6000_tune == PROCESSOR_PPC620
16871 || rs6000_tune == PROCESSOR_PPC630
16872 || rs6000_tune == PROCESSOR_PPC750
16873 || rs6000_tune == PROCESSOR_PPC7400
16874 || rs6000_tune == PROCESSOR_PPC7450
16875 || rs6000_tune == PROCESSOR_PPCE5500
16876 || rs6000_tune == PROCESSOR_PPCE6500
16877 || rs6000_tune == PROCESSOR_POWER4
16878 || rs6000_tune == PROCESSOR_POWER5
16879 || rs6000_tune == PROCESSOR_POWER7
16880 || rs6000_tune == PROCESSOR_POWER8
16881 || rs6000_tune == PROCESSOR_POWER9
16882 || rs6000_tune == PROCESSOR_FUTURE
16883 || rs6000_tune == PROCESSOR_CELL)
16884 && recog_memoized (dep_insn)
16885 && (INSN_CODE (dep_insn) >= 0))
16887 switch (get_attr_type (dep_insn))
16889 case TYPE_CMP:
16890 case TYPE_FPCOMPARE:
16891 case TYPE_CR_LOGICAL:
16892 return cost + 2;
16893 case TYPE_EXTS:
16894 case TYPE_MUL:
16895 if (get_attr_dot (dep_insn) == DOT_YES)
16896 return cost + 2;
16897 else
16898 break;
16899 case TYPE_SHIFT:
16900 if (get_attr_dot (dep_insn) == DOT_YES
16901 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16902 return cost + 2;
16903 else
16904 break;
16905 default:
16906 break;
16908 break;
16910 case TYPE_STORE:
16911 case TYPE_FPSTORE:
16912 if ((rs6000_tune == PROCESSOR_POWER6)
16913 && recog_memoized (dep_insn)
16914 && (INSN_CODE (dep_insn) >= 0))
16917 if (GET_CODE (PATTERN (insn)) != SET)
16918 /* If this happens, we have to extend this to schedule
16919 optimally. Return default for now. */
16920 return cost;
16922 /* Adjust the cost for the case where the value written
16923 by a fixed point operation is used as the address
16924 gen value on a store. */
16925 switch (get_attr_type (dep_insn))
16927 case TYPE_LOAD:
16928 case TYPE_CNTLZ:
16930 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16931 return get_attr_sign_extend (dep_insn)
16932 == SIGN_EXTEND_YES ? 6 : 4;
16933 break;
16935 case TYPE_SHIFT:
16937 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16938 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16939 6 : 3;
16940 break;
16942 case TYPE_INTEGER:
16943 case TYPE_ADD:
16944 case TYPE_LOGICAL:
16945 case TYPE_EXTS:
16946 case TYPE_INSERT:
16948 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16949 return 3;
16950 break;
16952 case TYPE_STORE:
16953 case TYPE_FPLOAD:
16954 case TYPE_FPSTORE:
16956 if (get_attr_update (dep_insn) == UPDATE_YES
16957 && ! rs6000_store_data_bypass_p (dep_insn, insn))
16958 return 3;
16959 break;
16961 case TYPE_MUL:
16963 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16964 return 17;
16965 break;
16967 case TYPE_DIV:
16969 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16970 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16971 break;
16973 default:
16974 break;
16977 break;
16979 case TYPE_LOAD:
16980 if ((rs6000_tune == PROCESSOR_POWER6)
16981 && recog_memoized (dep_insn)
16982 && (INSN_CODE (dep_insn) >= 0))
16985 /* Adjust the cost for the case where the value written
16986 by a fixed point instruction is used within the address
16987 gen portion of a subsequent load(u)(x) */
16988 switch (get_attr_type (dep_insn))
16990 case TYPE_LOAD:
16991 case TYPE_CNTLZ:
16993 if (set_to_load_agen (dep_insn, insn))
16994 return get_attr_sign_extend (dep_insn)
16995 == SIGN_EXTEND_YES ? 6 : 4;
16996 break;
16998 case TYPE_SHIFT:
17000 if (set_to_load_agen (dep_insn, insn))
17001 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17002 6 : 3;
17003 break;
17005 case TYPE_INTEGER:
17006 case TYPE_ADD:
17007 case TYPE_LOGICAL:
17008 case TYPE_EXTS:
17009 case TYPE_INSERT:
17011 if (set_to_load_agen (dep_insn, insn))
17012 return 3;
17013 break;
17015 case TYPE_STORE:
17016 case TYPE_FPLOAD:
17017 case TYPE_FPSTORE:
17019 if (get_attr_update (dep_insn) == UPDATE_YES
17020 && set_to_load_agen (dep_insn, insn))
17021 return 3;
17022 break;
17024 case TYPE_MUL:
17026 if (set_to_load_agen (dep_insn, insn))
17027 return 17;
17028 break;
17030 case TYPE_DIV:
17032 if (set_to_load_agen (dep_insn, insn))
17033 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17034 break;
17036 default:
17037 break;
17040 break;
17042 case TYPE_FPLOAD:
17043 if ((rs6000_tune == PROCESSOR_POWER6)
17044 && get_attr_update (insn) == UPDATE_NO
17045 && recog_memoized (dep_insn)
17046 && (INSN_CODE (dep_insn) >= 0)
17047 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17048 return 2;
17050 default:
17051 break;
17054 /* Fall out to return default cost. */
17056 break;
17058 case REG_DEP_OUTPUT:
17059 /* Output dependency; DEP_INSN writes a register that INSN writes some
17060 cycles later. */
17061 if ((rs6000_tune == PROCESSOR_POWER6)
17062 && recog_memoized (dep_insn)
17063 && (INSN_CODE (dep_insn) >= 0))
17065 attr_type = get_attr_type (insn);
17067 switch (attr_type)
17069 case TYPE_FP:
17070 case TYPE_FPSIMPLE:
17071 if (get_attr_type (dep_insn) == TYPE_FP
17072 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17073 return 1;
17074 break;
17075 case TYPE_FPLOAD:
17076 if (get_attr_update (insn) == UPDATE_NO
17077 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17078 return 2;
17079 break;
17080 default:
17081 break;
17084 /* Fall through, no cost for output dependency. */
17085 /* FALLTHRU */
17087 case REG_DEP_ANTI:
17088 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17089 cycles later. */
17090 return 0;
17092 default:
17093 gcc_unreachable ();
17096 return cost;
17099 /* Debug version of rs6000_adjust_cost. */
17101 static int
17102 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17103 int cost, unsigned int dw)
17105 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17107 if (ret != cost)
17109 const char *dep;
17111 switch (dep_type)
17113 default: dep = "unknown depencency"; break;
17114 case REG_DEP_TRUE: dep = "data dependency"; break;
17115 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17116 case REG_DEP_ANTI: dep = "anti depencency"; break;
17119 fprintf (stderr,
17120 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17121 "%s, insn:\n", ret, cost, dep);
17123 debug_rtx (insn);
17126 return ret;
17129 /* The function returns a true if INSN is microcoded.
17130 Return false otherwise. */
17132 static bool
17133 is_microcoded_insn (rtx_insn *insn)
17135 if (!insn || !NONDEBUG_INSN_P (insn)
17136 || GET_CODE (PATTERN (insn)) == USE
17137 || GET_CODE (PATTERN (insn)) == CLOBBER)
17138 return false;
17140 if (rs6000_tune == PROCESSOR_CELL)
17141 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17143 if (rs6000_sched_groups
17144 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17146 enum attr_type type = get_attr_type (insn);
17147 if ((type == TYPE_LOAD
17148 && get_attr_update (insn) == UPDATE_YES
17149 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17150 || ((type == TYPE_LOAD || type == TYPE_STORE)
17151 && get_attr_update (insn) == UPDATE_YES
17152 && get_attr_indexed (insn) == INDEXED_YES)
17153 || type == TYPE_MFCR)
17154 return true;
17157 return false;
17160 /* The function returns true if INSN is cracked into 2 instructions
17161 by the processor (and therefore occupies 2 issue slots). */
17163 static bool
17164 is_cracked_insn (rtx_insn *insn)
17166 if (!insn || !NONDEBUG_INSN_P (insn)
17167 || GET_CODE (PATTERN (insn)) == USE
17168 || GET_CODE (PATTERN (insn)) == CLOBBER)
17169 return false;
17171 if (rs6000_sched_groups
17172 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17174 enum attr_type type = get_attr_type (insn);
17175 if ((type == TYPE_LOAD
17176 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17177 && get_attr_update (insn) == UPDATE_NO)
17178 || (type == TYPE_LOAD
17179 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17180 && get_attr_update (insn) == UPDATE_YES
17181 && get_attr_indexed (insn) == INDEXED_NO)
17182 || (type == TYPE_STORE
17183 && get_attr_update (insn) == UPDATE_YES
17184 && get_attr_indexed (insn) == INDEXED_NO)
17185 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17186 && get_attr_update (insn) == UPDATE_YES)
17187 || (type == TYPE_CR_LOGICAL
17188 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17189 || (type == TYPE_EXTS
17190 && get_attr_dot (insn) == DOT_YES)
17191 || (type == TYPE_SHIFT
17192 && get_attr_dot (insn) == DOT_YES
17193 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17194 || (type == TYPE_MUL
17195 && get_attr_dot (insn) == DOT_YES)
17196 || type == TYPE_DIV
17197 || (type == TYPE_INSERT
17198 && get_attr_size (insn) == SIZE_32))
17199 return true;
17202 return false;
17205 /* The function returns true if INSN can be issued only from
17206 the branch slot. */
17208 static bool
17209 is_branch_slot_insn (rtx_insn *insn)
17211 if (!insn || !NONDEBUG_INSN_P (insn)
17212 || GET_CODE (PATTERN (insn)) == USE
17213 || GET_CODE (PATTERN (insn)) == CLOBBER)
17214 return false;
17216 if (rs6000_sched_groups)
17218 enum attr_type type = get_attr_type (insn);
17219 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17220 return true;
17221 return false;
17224 return false;
17227 /* The function returns true if out_inst sets a value that is
17228 used in the address generation computation of in_insn */
17229 static bool
17230 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17232 rtx out_set, in_set;
17234 /* For performance reasons, only handle the simple case where
17235 both loads are a single_set. */
17236 out_set = single_set (out_insn);
17237 if (out_set)
17239 in_set = single_set (in_insn);
17240 if (in_set)
17241 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17244 return false;
17247 /* Try to determine base/offset/size parts of the given MEM.
17248 Return true if successful, false if all the values couldn't
17249 be determined.
17251 This function only looks for REG or REG+CONST address forms.
17252 REG+REG address form will return false. */
17254 static bool
17255 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17256 HOST_WIDE_INT *size)
17258 rtx addr_rtx;
17259 if MEM_SIZE_KNOWN_P (mem)
17260 *size = MEM_SIZE (mem);
17261 else
17262 return false;
17264 addr_rtx = (XEXP (mem, 0));
17265 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17266 addr_rtx = XEXP (addr_rtx, 1);
17268 *offset = 0;
17269 while (GET_CODE (addr_rtx) == PLUS
17270 && CONST_INT_P (XEXP (addr_rtx, 1)))
17272 *offset += INTVAL (XEXP (addr_rtx, 1));
17273 addr_rtx = XEXP (addr_rtx, 0);
17275 if (!REG_P (addr_rtx))
17276 return false;
17278 *base = addr_rtx;
17279 return true;
17282 /* The function returns true if the target storage location of
17283 mem1 is adjacent to the target storage location of mem2 */
17284 /* Return 1 if memory locations are adjacent. */
17286 static bool
17287 adjacent_mem_locations (rtx mem1, rtx mem2)
17289 rtx reg1, reg2;
17290 HOST_WIDE_INT off1, size1, off2, size2;
17292 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17293 && get_memref_parts (mem2, &reg2, &off2, &size2))
17294 return ((REGNO (reg1) == REGNO (reg2))
17295 && ((off1 + size1 == off2)
17296 || (off2 + size2 == off1)));
17298 return false;
17301 /* This function returns true if it can be determined that the two MEM
17302 locations overlap by at least 1 byte based on base reg/offset/size. */
17304 static bool
17305 mem_locations_overlap (rtx mem1, rtx mem2)
17307 rtx reg1, reg2;
17308 HOST_WIDE_INT off1, size1, off2, size2;
17310 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17311 && get_memref_parts (mem2, &reg2, &off2, &size2))
17312 return ((REGNO (reg1) == REGNO (reg2))
17313 && (((off1 <= off2) && (off1 + size1 > off2))
17314 || ((off2 <= off1) && (off2 + size2 > off1))));
17316 return false;
17319 /* A C statement (sans semicolon) to update the integer scheduling
17320 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17321 INSN earlier, reduce the priority to execute INSN later. Do not
17322 define this macro if you do not need to adjust the scheduling
17323 priorities of insns. */
17325 static int
17326 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17328 rtx load_mem, str_mem;
17329 /* On machines (like the 750) which have asymmetric integer units,
17330 where one integer unit can do multiply and divides and the other
17331 can't, reduce the priority of multiply/divide so it is scheduled
17332 before other integer operations. */
17334 #if 0
17335 if (! INSN_P (insn))
17336 return priority;
17338 if (GET_CODE (PATTERN (insn)) == USE)
17339 return priority;
17341 switch (rs6000_tune) {
17342 case PROCESSOR_PPC750:
17343 switch (get_attr_type (insn))
17345 default:
17346 break;
17348 case TYPE_MUL:
17349 case TYPE_DIV:
17350 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17351 priority, priority);
17352 if (priority >= 0 && priority < 0x01000000)
17353 priority >>= 3;
17354 break;
17357 #endif
17359 if (insn_must_be_first_in_group (insn)
17360 && reload_completed
17361 && current_sched_info->sched_max_insns_priority
17362 && rs6000_sched_restricted_insns_priority)
17365 /* Prioritize insns that can be dispatched only in the first
17366 dispatch slot. */
17367 if (rs6000_sched_restricted_insns_priority == 1)
17368 /* Attach highest priority to insn. This means that in
17369 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17370 precede 'priority' (critical path) considerations. */
17371 return current_sched_info->sched_max_insns_priority;
17372 else if (rs6000_sched_restricted_insns_priority == 2)
17373 /* Increase priority of insn by a minimal amount. This means that in
17374 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17375 considerations precede dispatch-slot restriction considerations. */
17376 return (priority + 1);
17379 if (rs6000_tune == PROCESSOR_POWER6
17380 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17381 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17382 /* Attach highest priority to insn if the scheduler has just issued two
17383 stores and this instruction is a load, or two loads and this instruction
17384 is a store. Power6 wants loads and stores scheduled alternately
17385 when possible */
17386 return current_sched_info->sched_max_insns_priority;
17388 return priority;
17391 /* Return true if the instruction is nonpipelined on the Cell. */
17392 static bool
17393 is_nonpipeline_insn (rtx_insn *insn)
17395 enum attr_type type;
17396 if (!insn || !NONDEBUG_INSN_P (insn)
17397 || GET_CODE (PATTERN (insn)) == USE
17398 || GET_CODE (PATTERN (insn)) == CLOBBER)
17399 return false;
17401 type = get_attr_type (insn);
17402 if (type == TYPE_MUL
17403 || type == TYPE_DIV
17404 || type == TYPE_SDIV
17405 || type == TYPE_DDIV
17406 || type == TYPE_SSQRT
17407 || type == TYPE_DSQRT
17408 || type == TYPE_MFCR
17409 || type == TYPE_MFCRF
17410 || type == TYPE_MFJMPR)
17412 return true;
17414 return false;
17418 /* Return how many instructions the machine can issue per cycle. */
17420 static int
17421 rs6000_issue_rate (void)
17423 /* Unless scheduling for register pressure, use issue rate of 1 for
17424 first scheduling pass to decrease degradation. */
17425 if (!reload_completed && !flag_sched_pressure)
17426 return 1;
17428 switch (rs6000_tune) {
17429 case PROCESSOR_RS64A:
17430 case PROCESSOR_PPC601: /* ? */
17431 case PROCESSOR_PPC7450:
17432 return 3;
17433 case PROCESSOR_PPC440:
17434 case PROCESSOR_PPC603:
17435 case PROCESSOR_PPC750:
17436 case PROCESSOR_PPC7400:
17437 case PROCESSOR_PPC8540:
17438 case PROCESSOR_PPC8548:
17439 case PROCESSOR_CELL:
17440 case PROCESSOR_PPCE300C2:
17441 case PROCESSOR_PPCE300C3:
17442 case PROCESSOR_PPCE500MC:
17443 case PROCESSOR_PPCE500MC64:
17444 case PROCESSOR_PPCE5500:
17445 case PROCESSOR_PPCE6500:
17446 case PROCESSOR_TITAN:
17447 return 2;
17448 case PROCESSOR_PPC476:
17449 case PROCESSOR_PPC604:
17450 case PROCESSOR_PPC604e:
17451 case PROCESSOR_PPC620:
17452 case PROCESSOR_PPC630:
17453 return 4;
17454 case PROCESSOR_POWER4:
17455 case PROCESSOR_POWER5:
17456 case PROCESSOR_POWER6:
17457 case PROCESSOR_POWER7:
17458 return 5;
17459 case PROCESSOR_POWER8:
17460 return 7;
17461 case PROCESSOR_POWER9:
17462 case PROCESSOR_FUTURE:
17463 return 6;
17464 default:
17465 return 1;
17469 /* Return how many instructions to look ahead for better insn
17470 scheduling. */
17472 static int
17473 rs6000_use_sched_lookahead (void)
17475 switch (rs6000_tune)
17477 case PROCESSOR_PPC8540:
17478 case PROCESSOR_PPC8548:
17479 return 4;
17481 case PROCESSOR_CELL:
17482 return (reload_completed ? 8 : 0);
17484 default:
17485 return 0;
17489 /* We are choosing insn from the ready queue. Return zero if INSN can be
17490 chosen. */
17491 static int
17492 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17494 if (ready_index == 0)
17495 return 0;
17497 if (rs6000_tune != PROCESSOR_CELL)
17498 return 0;
17500 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17502 if (!reload_completed
17503 || is_nonpipeline_insn (insn)
17504 || is_microcoded_insn (insn))
17505 return 1;
17507 return 0;
17510 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17511 and return true. */
17513 static bool
17514 find_mem_ref (rtx pat, rtx *mem_ref)
17516 const char * fmt;
17517 int i, j;
17519 /* stack_tie does not produce any real memory traffic. */
17520 if (tie_operand (pat, VOIDmode))
17521 return false;
17523 if (MEM_P (pat))
17525 *mem_ref = pat;
17526 return true;
17529 /* Recursively process the pattern. */
17530 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17532 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17534 if (fmt[i] == 'e')
17536 if (find_mem_ref (XEXP (pat, i), mem_ref))
17537 return true;
17539 else if (fmt[i] == 'E')
17540 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17542 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17543 return true;
17547 return false;
17550 /* Determine if PAT is a PATTERN of a load insn. */
17552 static bool
17553 is_load_insn1 (rtx pat, rtx *load_mem)
17555 if (!pat || pat == NULL_RTX)
17556 return false;
17558 if (GET_CODE (pat) == SET)
17559 return find_mem_ref (SET_SRC (pat), load_mem);
17561 if (GET_CODE (pat) == PARALLEL)
17563 int i;
17565 for (i = 0; i < XVECLEN (pat, 0); i++)
17566 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17567 return true;
17570 return false;
17573 /* Determine if INSN loads from memory. */
17575 static bool
17576 is_load_insn (rtx insn, rtx *load_mem)
17578 if (!insn || !INSN_P (insn))
17579 return false;
17581 if (CALL_P (insn))
17582 return false;
17584 return is_load_insn1 (PATTERN (insn), load_mem);
17587 /* Determine if PAT is a PATTERN of a store insn. */
17589 static bool
17590 is_store_insn1 (rtx pat, rtx *str_mem)
17592 if (!pat || pat == NULL_RTX)
17593 return false;
17595 if (GET_CODE (pat) == SET)
17596 return find_mem_ref (SET_DEST (pat), str_mem);
17598 if (GET_CODE (pat) == PARALLEL)
17600 int i;
17602 for (i = 0; i < XVECLEN (pat, 0); i++)
17603 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17604 return true;
17607 return false;
17610 /* Determine if INSN stores to memory. */
17612 static bool
17613 is_store_insn (rtx insn, rtx *str_mem)
17615 if (!insn || !INSN_P (insn))
17616 return false;
17618 return is_store_insn1 (PATTERN (insn), str_mem);
17621 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17623 static bool
17624 is_power9_pairable_vec_type (enum attr_type type)
17626 switch (type)
17628 case TYPE_VECSIMPLE:
17629 case TYPE_VECCOMPLEX:
17630 case TYPE_VECDIV:
17631 case TYPE_VECCMP:
17632 case TYPE_VECPERM:
17633 case TYPE_VECFLOAT:
17634 case TYPE_VECFDIV:
17635 case TYPE_VECDOUBLE:
17636 return true;
17637 default:
17638 break;
17640 return false;
17643 /* Returns whether the dependence between INSN and NEXT is considered
17644 costly by the given target. */
17646 static bool
17647 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17649 rtx insn;
17650 rtx next;
17651 rtx load_mem, str_mem;
17653 /* If the flag is not enabled - no dependence is considered costly;
17654 allow all dependent insns in the same group.
17655 This is the most aggressive option. */
17656 if (rs6000_sched_costly_dep == no_dep_costly)
17657 return false;
17659 /* If the flag is set to 1 - a dependence is always considered costly;
17660 do not allow dependent instructions in the same group.
17661 This is the most conservative option. */
17662 if (rs6000_sched_costly_dep == all_deps_costly)
17663 return true;
17665 insn = DEP_PRO (dep);
17666 next = DEP_CON (dep);
17668 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17669 && is_load_insn (next, &load_mem)
17670 && is_store_insn (insn, &str_mem))
17671 /* Prevent load after store in the same group. */
17672 return true;
17674 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17675 && is_load_insn (next, &load_mem)
17676 && is_store_insn (insn, &str_mem)
17677 && DEP_TYPE (dep) == REG_DEP_TRUE
17678 && mem_locations_overlap(str_mem, load_mem))
17679 /* Prevent load after store in the same group if it is a true
17680 dependence. */
17681 return true;
17683 /* The flag is set to X; dependences with latency >= X are considered costly,
17684 and will not be scheduled in the same group. */
17685 if (rs6000_sched_costly_dep <= max_dep_latency
17686 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17687 return true;
17689 return false;
17692 /* Return the next insn after INSN that is found before TAIL is reached,
17693 skipping any "non-active" insns - insns that will not actually occupy
17694 an issue slot. Return NULL_RTX if such an insn is not found. */
17696 static rtx_insn *
17697 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17699 if (insn == NULL_RTX || insn == tail)
17700 return NULL;
17702 while (1)
17704 insn = NEXT_INSN (insn);
17705 if (insn == NULL_RTX || insn == tail)
17706 return NULL;
17708 if (CALL_P (insn)
17709 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17710 || (NONJUMP_INSN_P (insn)
17711 && GET_CODE (PATTERN (insn)) != USE
17712 && GET_CODE (PATTERN (insn)) != CLOBBER
17713 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17714 break;
17716 return insn;
17719 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17721 static int
17722 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17724 int pos;
17725 int i;
17726 rtx_insn *tmp;
17727 enum attr_type type, type2;
17729 type = get_attr_type (last_scheduled_insn);
17731 /* Try to issue fixed point divides back-to-back in pairs so they will be
17732 routed to separate execution units and execute in parallel. */
17733 if (type == TYPE_DIV && divide_cnt == 0)
17735 /* First divide has been scheduled. */
17736 divide_cnt = 1;
17738 /* Scan the ready list looking for another divide, if found move it
17739 to the end of the list so it is chosen next. */
17740 pos = lastpos;
17741 while (pos >= 0)
17743 if (recog_memoized (ready[pos]) >= 0
17744 && get_attr_type (ready[pos]) == TYPE_DIV)
17746 tmp = ready[pos];
17747 for (i = pos; i < lastpos; i++)
17748 ready[i] = ready[i + 1];
17749 ready[lastpos] = tmp;
17750 break;
17752 pos--;
17755 else
17757 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17758 divide_cnt = 0;
17760 /* The best dispatch throughput for vector and vector load insns can be
17761 achieved by interleaving a vector and vector load such that they'll
17762 dispatch to the same superslice. If this pairing cannot be achieved
17763 then it is best to pair vector insns together and vector load insns
17764 together.
17766 To aid in this pairing, vec_pairing maintains the current state with
17767 the following values:
17769 0 : Initial state, no vecload/vector pairing has been started.
17771 1 : A vecload or vector insn has been issued and a candidate for
17772 pairing has been found and moved to the end of the ready
17773 list. */
17774 if (type == TYPE_VECLOAD)
17776 /* Issued a vecload. */
17777 if (vec_pairing == 0)
17779 int vecload_pos = -1;
17780 /* We issued a single vecload, look for a vector insn to pair it
17781 with. If one isn't found, try to pair another vecload. */
17782 pos = lastpos;
17783 while (pos >= 0)
17785 if (recog_memoized (ready[pos]) >= 0)
17787 type2 = get_attr_type (ready[pos]);
17788 if (is_power9_pairable_vec_type (type2))
17790 /* Found a vector insn to pair with, move it to the
17791 end of the ready list so it is scheduled next. */
17792 tmp = ready[pos];
17793 for (i = pos; i < lastpos; i++)
17794 ready[i] = ready[i + 1];
17795 ready[lastpos] = tmp;
17796 vec_pairing = 1;
17797 return cached_can_issue_more;
17799 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
17800 /* Remember position of first vecload seen. */
17801 vecload_pos = pos;
17803 pos--;
17805 if (vecload_pos >= 0)
17807 /* Didn't find a vector to pair with but did find a vecload,
17808 move it to the end of the ready list. */
17809 tmp = ready[vecload_pos];
17810 for (i = vecload_pos; i < lastpos; i++)
17811 ready[i] = ready[i + 1];
17812 ready[lastpos] = tmp;
17813 vec_pairing = 1;
17814 return cached_can_issue_more;
17818 else if (is_power9_pairable_vec_type (type))
17820 /* Issued a vector operation. */
17821 if (vec_pairing == 0)
17823 int vec_pos = -1;
17824 /* We issued a single vector insn, look for a vecload to pair it
17825 with. If one isn't found, try to pair another vector. */
17826 pos = lastpos;
17827 while (pos >= 0)
17829 if (recog_memoized (ready[pos]) >= 0)
17831 type2 = get_attr_type (ready[pos]);
17832 if (type2 == TYPE_VECLOAD)
17834 /* Found a vecload insn to pair with, move it to the
17835 end of the ready list so it is scheduled next. */
17836 tmp = ready[pos];
17837 for (i = pos; i < lastpos; i++)
17838 ready[i] = ready[i + 1];
17839 ready[lastpos] = tmp;
17840 vec_pairing = 1;
17841 return cached_can_issue_more;
17843 else if (is_power9_pairable_vec_type (type2)
17844 && vec_pos == -1)
17845 /* Remember position of first vector insn seen. */
17846 vec_pos = pos;
17848 pos--;
17850 if (vec_pos >= 0)
17852 /* Didn't find a vecload to pair with but did find a vector
17853 insn, move it to the end of the ready list. */
17854 tmp = ready[vec_pos];
17855 for (i = vec_pos; i < lastpos; i++)
17856 ready[i] = ready[i + 1];
17857 ready[lastpos] = tmp;
17858 vec_pairing = 1;
17859 return cached_can_issue_more;
17864 /* We've either finished a vec/vecload pair, couldn't find an insn to
17865 continue the current pair, or the last insn had nothing to do with
17866 with pairing. In any case, reset the state. */
17867 vec_pairing = 0;
17870 return cached_can_issue_more;
17873 /* We are about to begin issuing insns for this clock cycle. */
17875 static int
17876 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
17877 rtx_insn **ready ATTRIBUTE_UNUSED,
17878 int *pn_ready ATTRIBUTE_UNUSED,
17879 int clock_var ATTRIBUTE_UNUSED)
17881 int n_ready = *pn_ready;
17883 if (sched_verbose)
17884 fprintf (dump, "// rs6000_sched_reorder :\n");
17886 /* Reorder the ready list, if the second to last ready insn
17887 is a nonepipeline insn. */
17888 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
17890 if (is_nonpipeline_insn (ready[n_ready - 1])
17891 && (recog_memoized (ready[n_ready - 2]) > 0))
17892 /* Simply swap first two insns. */
17893 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
17896 if (rs6000_tune == PROCESSOR_POWER6)
17897 load_store_pendulum = 0;
17899 return rs6000_issue_rate ();
17902 /* Like rs6000_sched_reorder, but called after issuing each insn. */
17904 static int
17905 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
17906 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
17908 if (sched_verbose)
17909 fprintf (dump, "// rs6000_sched_reorder2 :\n");
17911 /* For Power6, we need to handle some special cases to try and keep the
17912 store queue from overflowing and triggering expensive flushes.
17914 This code monitors how load and store instructions are being issued
17915 and skews the ready list one way or the other to increase the likelihood
17916 that a desired instruction is issued at the proper time.
17918 A couple of things are done. First, we maintain a "load_store_pendulum"
17919 to track the current state of load/store issue.
17921 - If the pendulum is at zero, then no loads or stores have been
17922 issued in the current cycle so we do nothing.
17924 - If the pendulum is 1, then a single load has been issued in this
17925 cycle and we attempt to locate another load in the ready list to
17926 issue with it.
17928 - If the pendulum is -2, then two stores have already been
17929 issued in this cycle, so we increase the priority of the first load
17930 in the ready list to increase it's likelihood of being chosen first
17931 in the next cycle.
17933 - If the pendulum is -1, then a single store has been issued in this
17934 cycle and we attempt to locate another store in the ready list to
17935 issue with it, preferring a store to an adjacent memory location to
17936 facilitate store pairing in the store queue.
17938 - If the pendulum is 2, then two loads have already been
17939 issued in this cycle, so we increase the priority of the first store
17940 in the ready list to increase it's likelihood of being chosen first
17941 in the next cycle.
17943 - If the pendulum < -2 or > 2, then do nothing.
17945 Note: This code covers the most common scenarios. There exist non
17946 load/store instructions which make use of the LSU and which
17947 would need to be accounted for to strictly model the behavior
17948 of the machine. Those instructions are currently unaccounted
17949 for to help minimize compile time overhead of this code.
17951 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
17953 int pos;
17954 int i;
17955 rtx_insn *tmp;
17956 rtx load_mem, str_mem;
17958 if (is_store_insn (last_scheduled_insn, &str_mem))
17959 /* Issuing a store, swing the load_store_pendulum to the left */
17960 load_store_pendulum--;
17961 else if (is_load_insn (last_scheduled_insn, &load_mem))
17962 /* Issuing a load, swing the load_store_pendulum to the right */
17963 load_store_pendulum++;
17964 else
17965 return cached_can_issue_more;
17967 /* If the pendulum is balanced, or there is only one instruction on
17968 the ready list, then all is well, so return. */
17969 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
17970 return cached_can_issue_more;
17972 if (load_store_pendulum == 1)
17974 /* A load has been issued in this cycle. Scan the ready list
17975 for another load to issue with it */
17976 pos = *pn_ready-1;
17978 while (pos >= 0)
17980 if (is_load_insn (ready[pos], &load_mem))
17982 /* Found a load. Move it to the head of the ready list,
17983 and adjust it's priority so that it is more likely to
17984 stay there */
17985 tmp = ready[pos];
17986 for (i=pos; i<*pn_ready-1; i++)
17987 ready[i] = ready[i + 1];
17988 ready[*pn_ready-1] = tmp;
17990 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
17991 INSN_PRIORITY (tmp)++;
17992 break;
17994 pos--;
17997 else if (load_store_pendulum == -2)
17999 /* Two stores have been issued in this cycle. Increase the
18000 priority of the first load in the ready list to favor it for
18001 issuing in the next cycle. */
18002 pos = *pn_ready-1;
18004 while (pos >= 0)
18006 if (is_load_insn (ready[pos], &load_mem)
18007 && !sel_sched_p ()
18008 && INSN_PRIORITY_KNOWN (ready[pos]))
18010 INSN_PRIORITY (ready[pos])++;
18012 /* Adjust the pendulum to account for the fact that a load
18013 was found and increased in priority. This is to prevent
18014 increasing the priority of multiple loads */
18015 load_store_pendulum--;
18017 break;
18019 pos--;
18022 else if (load_store_pendulum == -1)
18024 /* A store has been issued in this cycle. Scan the ready list for
18025 another store to issue with it, preferring a store to an adjacent
18026 memory location */
18027 int first_store_pos = -1;
18029 pos = *pn_ready-1;
18031 while (pos >= 0)
18033 if (is_store_insn (ready[pos], &str_mem))
18035 rtx str_mem2;
18036 /* Maintain the index of the first store found on the
18037 list */
18038 if (first_store_pos == -1)
18039 first_store_pos = pos;
18041 if (is_store_insn (last_scheduled_insn, &str_mem2)
18042 && adjacent_mem_locations (str_mem, str_mem2))
18044 /* Found an adjacent store. Move it to the head of the
18045 ready list, and adjust it's priority so that it is
18046 more likely to stay there */
18047 tmp = ready[pos];
18048 for (i=pos; i<*pn_ready-1; i++)
18049 ready[i] = ready[i + 1];
18050 ready[*pn_ready-1] = tmp;
18052 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
18053 INSN_PRIORITY (tmp)++;
18055 first_store_pos = -1;
18057 break;
18060 pos--;
18063 if (first_store_pos >= 0)
18065 /* An adjacent store wasn't found, but a non-adjacent store was,
18066 so move the non-adjacent store to the front of the ready
18067 list, and adjust its priority so that it is more likely to
18068 stay there. */
18069 tmp = ready[first_store_pos];
18070 for (i=first_store_pos; i<*pn_ready-1; i++)
18071 ready[i] = ready[i + 1];
18072 ready[*pn_ready-1] = tmp;
18073 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
18074 INSN_PRIORITY (tmp)++;
18077 else if (load_store_pendulum == 2)
18079 /* Two loads have been issued in this cycle. Increase the priority
18080 of the first store in the ready list to favor it for issuing in
18081 the next cycle. */
18082 pos = *pn_ready-1;
18084 while (pos >= 0)
18086 if (is_store_insn (ready[pos], &str_mem)
18087 && !sel_sched_p ()
18088 && INSN_PRIORITY_KNOWN (ready[pos]))
18090 INSN_PRIORITY (ready[pos])++;
18092 /* Adjust the pendulum to account for the fact that a store
18093 was found and increased in priority. This is to prevent
18094 increasing the priority of multiple stores */
18095 load_store_pendulum++;
18097 break;
18099 pos--;
18104 /* Do Power9 dependent reordering if necessary. */
18105 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18106 && recog_memoized (last_scheduled_insn) >= 0)
18107 return power9_sched_reorder2 (ready, *pn_ready - 1);
18109 return cached_can_issue_more;
18112 /* Return whether the presence of INSN causes a dispatch group termination
18113 of group WHICH_GROUP.
18115 If WHICH_GROUP == current_group, this function will return true if INSN
18116 causes the termination of the current group (i.e, the dispatch group to
18117 which INSN belongs). This means that INSN will be the last insn in the
18118 group it belongs to.
18120 If WHICH_GROUP == previous_group, this function will return true if INSN
18121 causes the termination of the previous group (i.e, the dispatch group that
18122 precedes the group to which INSN belongs). This means that INSN will be
18123 the first insn in the group it belongs to). */
18125 static bool
18126 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18128 bool first, last;
18130 if (! insn)
18131 return false;
18133 first = insn_must_be_first_in_group (insn);
18134 last = insn_must_be_last_in_group (insn);
18136 if (first && last)
18137 return true;
18139 if (which_group == current_group)
18140 return last;
18141 else if (which_group == previous_group)
18142 return first;
18144 return false;
18148 static bool
18149 insn_must_be_first_in_group (rtx_insn *insn)
18151 enum attr_type type;
18153 if (!insn
18154 || NOTE_P (insn)
18155 || DEBUG_INSN_P (insn)
18156 || GET_CODE (PATTERN (insn)) == USE
18157 || GET_CODE (PATTERN (insn)) == CLOBBER)
18158 return false;
18160 switch (rs6000_tune)
18162 case PROCESSOR_POWER5:
18163 if (is_cracked_insn (insn))
18164 return true;
18165 /* FALLTHRU */
18166 case PROCESSOR_POWER4:
18167 if (is_microcoded_insn (insn))
18168 return true;
18170 if (!rs6000_sched_groups)
18171 return false;
18173 type = get_attr_type (insn);
18175 switch (type)
18177 case TYPE_MFCR:
18178 case TYPE_MFCRF:
18179 case TYPE_MTCR:
18180 case TYPE_CR_LOGICAL:
18181 case TYPE_MTJMPR:
18182 case TYPE_MFJMPR:
18183 case TYPE_DIV:
18184 case TYPE_LOAD_L:
18185 case TYPE_STORE_C:
18186 case TYPE_ISYNC:
18187 case TYPE_SYNC:
18188 return true;
18189 default:
18190 break;
18192 break;
18193 case PROCESSOR_POWER6:
18194 type = get_attr_type (insn);
18196 switch (type)
18198 case TYPE_EXTS:
18199 case TYPE_CNTLZ:
18200 case TYPE_TRAP:
18201 case TYPE_MUL:
18202 case TYPE_INSERT:
18203 case TYPE_FPCOMPARE:
18204 case TYPE_MFCR:
18205 case TYPE_MTCR:
18206 case TYPE_MFJMPR:
18207 case TYPE_MTJMPR:
18208 case TYPE_ISYNC:
18209 case TYPE_SYNC:
18210 case TYPE_LOAD_L:
18211 case TYPE_STORE_C:
18212 return true;
18213 case TYPE_SHIFT:
18214 if (get_attr_dot (insn) == DOT_NO
18215 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18216 return true;
18217 else
18218 break;
18219 case TYPE_DIV:
18220 if (get_attr_size (insn) == SIZE_32)
18221 return true;
18222 else
18223 break;
18224 case TYPE_LOAD:
18225 case TYPE_STORE:
18226 case TYPE_FPLOAD:
18227 case TYPE_FPSTORE:
18228 if (get_attr_update (insn) == UPDATE_YES)
18229 return true;
18230 else
18231 break;
18232 default:
18233 break;
18235 break;
18236 case PROCESSOR_POWER7:
18237 type = get_attr_type (insn);
18239 switch (type)
18241 case TYPE_CR_LOGICAL:
18242 case TYPE_MFCR:
18243 case TYPE_MFCRF:
18244 case TYPE_MTCR:
18245 case TYPE_DIV:
18246 case TYPE_ISYNC:
18247 case TYPE_LOAD_L:
18248 case TYPE_STORE_C:
18249 case TYPE_MFJMPR:
18250 case TYPE_MTJMPR:
18251 return true;
18252 case TYPE_MUL:
18253 case TYPE_SHIFT:
18254 case TYPE_EXTS:
18255 if (get_attr_dot (insn) == DOT_YES)
18256 return true;
18257 else
18258 break;
18259 case TYPE_LOAD:
18260 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18261 || get_attr_update (insn) == UPDATE_YES)
18262 return true;
18263 else
18264 break;
18265 case TYPE_STORE:
18266 case TYPE_FPLOAD:
18267 case TYPE_FPSTORE:
18268 if (get_attr_update (insn) == UPDATE_YES)
18269 return true;
18270 else
18271 break;
18272 default:
18273 break;
18275 break;
18276 case PROCESSOR_POWER8:
18277 type = get_attr_type (insn);
18279 switch (type)
18281 case TYPE_CR_LOGICAL:
18282 case TYPE_MFCR:
18283 case TYPE_MFCRF:
18284 case TYPE_MTCR:
18285 case TYPE_SYNC:
18286 case TYPE_ISYNC:
18287 case TYPE_LOAD_L:
18288 case TYPE_STORE_C:
18289 case TYPE_VECSTORE:
18290 case TYPE_MFJMPR:
18291 case TYPE_MTJMPR:
18292 return true;
18293 case TYPE_SHIFT:
18294 case TYPE_EXTS:
18295 case TYPE_MUL:
18296 if (get_attr_dot (insn) == DOT_YES)
18297 return true;
18298 else
18299 break;
18300 case TYPE_LOAD:
18301 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18302 || get_attr_update (insn) == UPDATE_YES)
18303 return true;
18304 else
18305 break;
18306 case TYPE_STORE:
18307 if (get_attr_update (insn) == UPDATE_YES
18308 && get_attr_indexed (insn) == INDEXED_YES)
18309 return true;
18310 else
18311 break;
18312 default:
18313 break;
18315 break;
18316 default:
18317 break;
18320 return false;
18323 static bool
18324 insn_must_be_last_in_group (rtx_insn *insn)
18326 enum attr_type type;
18328 if (!insn
18329 || NOTE_P (insn)
18330 || DEBUG_INSN_P (insn)
18331 || GET_CODE (PATTERN (insn)) == USE
18332 || GET_CODE (PATTERN (insn)) == CLOBBER)
18333 return false;
18335 switch (rs6000_tune) {
18336 case PROCESSOR_POWER4:
18337 case PROCESSOR_POWER5:
18338 if (is_microcoded_insn (insn))
18339 return true;
18341 if (is_branch_slot_insn (insn))
18342 return true;
18344 break;
18345 case PROCESSOR_POWER6:
18346 type = get_attr_type (insn);
18348 switch (type)
18350 case TYPE_EXTS:
18351 case TYPE_CNTLZ:
18352 case TYPE_TRAP:
18353 case TYPE_MUL:
18354 case TYPE_FPCOMPARE:
18355 case TYPE_MFCR:
18356 case TYPE_MTCR:
18357 case TYPE_MFJMPR:
18358 case TYPE_MTJMPR:
18359 case TYPE_ISYNC:
18360 case TYPE_SYNC:
18361 case TYPE_LOAD_L:
18362 case TYPE_STORE_C:
18363 return true;
18364 case TYPE_SHIFT:
18365 if (get_attr_dot (insn) == DOT_NO
18366 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18367 return true;
18368 else
18369 break;
18370 case TYPE_DIV:
18371 if (get_attr_size (insn) == SIZE_32)
18372 return true;
18373 else
18374 break;
18375 default:
18376 break;
18378 break;
18379 case PROCESSOR_POWER7:
18380 type = get_attr_type (insn);
18382 switch (type)
18384 case TYPE_ISYNC:
18385 case TYPE_SYNC:
18386 case TYPE_LOAD_L:
18387 case TYPE_STORE_C:
18388 return true;
18389 case TYPE_LOAD:
18390 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18391 && get_attr_update (insn) == UPDATE_YES)
18392 return true;
18393 else
18394 break;
18395 case TYPE_STORE:
18396 if (get_attr_update (insn) == UPDATE_YES
18397 && get_attr_indexed (insn) == INDEXED_YES)
18398 return true;
18399 else
18400 break;
18401 default:
18402 break;
18404 break;
18405 case PROCESSOR_POWER8:
18406 type = get_attr_type (insn);
18408 switch (type)
18410 case TYPE_MFCR:
18411 case TYPE_MTCR:
18412 case TYPE_ISYNC:
18413 case TYPE_SYNC:
18414 case TYPE_LOAD_L:
18415 case TYPE_STORE_C:
18416 return true;
18417 case TYPE_LOAD:
18418 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18419 && get_attr_update (insn) == UPDATE_YES)
18420 return true;
18421 else
18422 break;
18423 case TYPE_STORE:
18424 if (get_attr_update (insn) == UPDATE_YES
18425 && get_attr_indexed (insn) == INDEXED_YES)
18426 return true;
18427 else
18428 break;
18429 default:
18430 break;
18432 break;
18433 default:
18434 break;
18437 return false;
18440 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18441 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18443 static bool
18444 is_costly_group (rtx *group_insns, rtx next_insn)
18446 int i;
18447 int issue_rate = rs6000_issue_rate ();
18449 for (i = 0; i < issue_rate; i++)
18451 sd_iterator_def sd_it;
18452 dep_t dep;
18453 rtx insn = group_insns[i];
18455 if (!insn)
18456 continue;
18458 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18460 rtx next = DEP_CON (dep);
18462 if (next == next_insn
18463 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18464 return true;
18468 return false;
18471 /* Utility of the function redefine_groups.
18472 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18473 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18474 to keep it "far" (in a separate group) from GROUP_INSNS, following
18475 one of the following schemes, depending on the value of the flag
18476 -minsert_sched_nops = X:
18477 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18478 in order to force NEXT_INSN into a separate group.
18479 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18480 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18481 insertion (has a group just ended, how many vacant issue slots remain in the
18482 last group, and how many dispatch groups were encountered so far). */
18484 static int
18485 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18486 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18487 int *group_count)
18489 rtx nop;
18490 bool force;
18491 int issue_rate = rs6000_issue_rate ();
18492 bool end = *group_end;
18493 int i;
18495 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18496 return can_issue_more;
18498 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18499 return can_issue_more;
18501 force = is_costly_group (group_insns, next_insn);
18502 if (!force)
18503 return can_issue_more;
18505 if (sched_verbose > 6)
18506 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18507 *group_count ,can_issue_more);
18509 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18511 if (*group_end)
18512 can_issue_more = 0;
18514 /* Since only a branch can be issued in the last issue_slot, it is
18515 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18516 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18517 in this case the last nop will start a new group and the branch
18518 will be forced to the new group. */
18519 if (can_issue_more && !is_branch_slot_insn (next_insn))
18520 can_issue_more--;
18522 /* Do we have a special group ending nop? */
18523 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18524 || rs6000_tune == PROCESSOR_POWER8)
18526 nop = gen_group_ending_nop ();
18527 emit_insn_before (nop, next_insn);
18528 can_issue_more = 0;
18530 else
18531 while (can_issue_more > 0)
18533 nop = gen_nop ();
18534 emit_insn_before (nop, next_insn);
18535 can_issue_more--;
18538 *group_end = true;
18539 return 0;
18542 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18544 int n_nops = rs6000_sched_insert_nops;
18546 /* Nops can't be issued from the branch slot, so the effective
18547 issue_rate for nops is 'issue_rate - 1'. */
18548 if (can_issue_more == 0)
18549 can_issue_more = issue_rate;
18550 can_issue_more--;
18551 if (can_issue_more == 0)
18553 can_issue_more = issue_rate - 1;
18554 (*group_count)++;
18555 end = true;
18556 for (i = 0; i < issue_rate; i++)
18558 group_insns[i] = 0;
18562 while (n_nops > 0)
18564 nop = gen_nop ();
18565 emit_insn_before (nop, next_insn);
18566 if (can_issue_more == issue_rate - 1) /* new group begins */
18567 end = false;
18568 can_issue_more--;
18569 if (can_issue_more == 0)
18571 can_issue_more = issue_rate - 1;
18572 (*group_count)++;
18573 end = true;
18574 for (i = 0; i < issue_rate; i++)
18576 group_insns[i] = 0;
18579 n_nops--;
18582 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18583 can_issue_more++;
18585 /* Is next_insn going to start a new group? */
18586 *group_end
18587 = (end
18588 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18589 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18590 || (can_issue_more < issue_rate &&
18591 insn_terminates_group_p (next_insn, previous_group)));
18592 if (*group_end && end)
18593 (*group_count)--;
18595 if (sched_verbose > 6)
18596 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18597 *group_count, can_issue_more);
18598 return can_issue_more;
18601 return can_issue_more;
18604 /* This function tries to synch the dispatch groups that the compiler "sees"
18605 with the dispatch groups that the processor dispatcher is expected to
18606 form in practice. It tries to achieve this synchronization by forcing the
18607 estimated processor grouping on the compiler (as opposed to the function
18608 'pad_goups' which tries to force the scheduler's grouping on the processor).
18610 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18611 examines the (estimated) dispatch groups that will be formed by the processor
18612 dispatcher. It marks these group boundaries to reflect the estimated
18613 processor grouping, overriding the grouping that the scheduler had marked.
18614 Depending on the value of the flag '-minsert-sched-nops' this function can
18615 force certain insns into separate groups or force a certain distance between
18616 them by inserting nops, for example, if there exists a "costly dependence"
18617 between the insns.
18619 The function estimates the group boundaries that the processor will form as
18620 follows: It keeps track of how many vacant issue slots are available after
18621 each insn. A subsequent insn will start a new group if one of the following
18622 4 cases applies:
18623 - no more vacant issue slots remain in the current dispatch group.
18624 - only the last issue slot, which is the branch slot, is vacant, but the next
18625 insn is not a branch.
18626 - only the last 2 or less issue slots, including the branch slot, are vacant,
18627 which means that a cracked insn (which occupies two issue slots) can't be
18628 issued in this group.
18629 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18630 start a new group. */
18632 static int
18633 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18634 rtx_insn *tail)
18636 rtx_insn *insn, *next_insn;
18637 int issue_rate;
18638 int can_issue_more;
18639 int slot, i;
18640 bool group_end;
18641 int group_count = 0;
18642 rtx *group_insns;
18644 /* Initialize. */
18645 issue_rate = rs6000_issue_rate ();
18646 group_insns = XALLOCAVEC (rtx, issue_rate);
18647 for (i = 0; i < issue_rate; i++)
18649 group_insns[i] = 0;
18651 can_issue_more = issue_rate;
18652 slot = 0;
18653 insn = get_next_active_insn (prev_head_insn, tail);
18654 group_end = false;
18656 while (insn != NULL_RTX)
18658 slot = (issue_rate - can_issue_more);
18659 group_insns[slot] = insn;
18660 can_issue_more =
18661 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18662 if (insn_terminates_group_p (insn, current_group))
18663 can_issue_more = 0;
18665 next_insn = get_next_active_insn (insn, tail);
18666 if (next_insn == NULL_RTX)
18667 return group_count + 1;
18669 /* Is next_insn going to start a new group? */
18670 group_end
18671 = (can_issue_more == 0
18672 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18673 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18674 || (can_issue_more < issue_rate &&
18675 insn_terminates_group_p (next_insn, previous_group)));
18677 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18678 next_insn, &group_end, can_issue_more,
18679 &group_count);
18681 if (group_end)
18683 group_count++;
18684 can_issue_more = 0;
18685 for (i = 0; i < issue_rate; i++)
18687 group_insns[i] = 0;
18691 if (GET_MODE (next_insn) == TImode && can_issue_more)
18692 PUT_MODE (next_insn, VOIDmode);
18693 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18694 PUT_MODE (next_insn, TImode);
18696 insn = next_insn;
18697 if (can_issue_more == 0)
18698 can_issue_more = issue_rate;
18699 } /* while */
18701 return group_count;
18704 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18705 dispatch group boundaries that the scheduler had marked. Pad with nops
18706 any dispatch groups which have vacant issue slots, in order to force the
18707 scheduler's grouping on the processor dispatcher. The function
18708 returns the number of dispatch groups found. */
18710 static int
18711 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18712 rtx_insn *tail)
18714 rtx_insn *insn, *next_insn;
18715 rtx nop;
18716 int issue_rate;
18717 int can_issue_more;
18718 int group_end;
18719 int group_count = 0;
18721 /* Initialize issue_rate. */
18722 issue_rate = rs6000_issue_rate ();
18723 can_issue_more = issue_rate;
18725 insn = get_next_active_insn (prev_head_insn, tail);
18726 next_insn = get_next_active_insn (insn, tail);
18728 while (insn != NULL_RTX)
18730 can_issue_more =
18731 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18733 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18735 if (next_insn == NULL_RTX)
18736 break;
18738 if (group_end)
18740 /* If the scheduler had marked group termination at this location
18741 (between insn and next_insn), and neither insn nor next_insn will
18742 force group termination, pad the group with nops to force group
18743 termination. */
18744 if (can_issue_more
18745 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18746 && !insn_terminates_group_p (insn, current_group)
18747 && !insn_terminates_group_p (next_insn, previous_group))
18749 if (!is_branch_slot_insn (next_insn))
18750 can_issue_more--;
18752 while (can_issue_more)
18754 nop = gen_nop ();
18755 emit_insn_before (nop, next_insn);
18756 can_issue_more--;
18760 can_issue_more = issue_rate;
18761 group_count++;
18764 insn = next_insn;
18765 next_insn = get_next_active_insn (insn, tail);
18768 return group_count;
18771 /* We're beginning a new block. Initialize data structures as necessary. */
18773 static void
18774 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18775 int sched_verbose ATTRIBUTE_UNUSED,
18776 int max_ready ATTRIBUTE_UNUSED)
18778 last_scheduled_insn = NULL;
18779 load_store_pendulum = 0;
18780 divide_cnt = 0;
18781 vec_pairing = 0;
18784 /* The following function is called at the end of scheduling BB.
18785 After reload, it inserts nops at insn group bundling. */
18787 static void
18788 rs6000_sched_finish (FILE *dump, int sched_verbose)
18790 int n_groups;
18792 if (sched_verbose)
18793 fprintf (dump, "=== Finishing schedule.\n");
18795 if (reload_completed && rs6000_sched_groups)
18797 /* Do not run sched_finish hook when selective scheduling enabled. */
18798 if (sel_sched_p ())
18799 return;
18801 if (rs6000_sched_insert_nops == sched_finish_none)
18802 return;
18804 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18805 n_groups = pad_groups (dump, sched_verbose,
18806 current_sched_info->prev_head,
18807 current_sched_info->next_tail);
18808 else
18809 n_groups = redefine_groups (dump, sched_verbose,
18810 current_sched_info->prev_head,
18811 current_sched_info->next_tail);
18813 if (sched_verbose >= 6)
18815 fprintf (dump, "ngroups = %d\n", n_groups);
18816 print_rtl (dump, current_sched_info->prev_head);
18817 fprintf (dump, "Done finish_sched\n");
18822 struct rs6000_sched_context
18824 short cached_can_issue_more;
18825 rtx_insn *last_scheduled_insn;
18826 int load_store_pendulum;
18827 int divide_cnt;
18828 int vec_pairing;
18831 typedef struct rs6000_sched_context rs6000_sched_context_def;
18832 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18834 /* Allocate store for new scheduling context. */
18835 static void *
18836 rs6000_alloc_sched_context (void)
18838 return xmalloc (sizeof (rs6000_sched_context_def));
18841 /* If CLEAN_P is true then initializes _SC with clean data,
18842 and from the global context otherwise. */
18843 static void
18844 rs6000_init_sched_context (void *_sc, bool clean_p)
18846 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18848 if (clean_p)
18850 sc->cached_can_issue_more = 0;
18851 sc->last_scheduled_insn = NULL;
18852 sc->load_store_pendulum = 0;
18853 sc->divide_cnt = 0;
18854 sc->vec_pairing = 0;
18856 else
18858 sc->cached_can_issue_more = cached_can_issue_more;
18859 sc->last_scheduled_insn = last_scheduled_insn;
18860 sc->load_store_pendulum = load_store_pendulum;
18861 sc->divide_cnt = divide_cnt;
18862 sc->vec_pairing = vec_pairing;
18866 /* Sets the global scheduling context to the one pointed to by _SC. */
18867 static void
18868 rs6000_set_sched_context (void *_sc)
18870 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18872 gcc_assert (sc != NULL);
18874 cached_can_issue_more = sc->cached_can_issue_more;
18875 last_scheduled_insn = sc->last_scheduled_insn;
18876 load_store_pendulum = sc->load_store_pendulum;
18877 divide_cnt = sc->divide_cnt;
18878 vec_pairing = sc->vec_pairing;
18881 /* Free _SC. */
18882 static void
18883 rs6000_free_sched_context (void *_sc)
18885 gcc_assert (_sc != NULL);
18887 free (_sc);
18890 static bool
18891 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18893 switch (get_attr_type (insn))
18895 case TYPE_DIV:
18896 case TYPE_SDIV:
18897 case TYPE_DDIV:
18898 case TYPE_VECDIV:
18899 case TYPE_SSQRT:
18900 case TYPE_DSQRT:
18901 return false;
18903 default:
18904 return true;
18908 /* Length in units of the trampoline for entering a nested function. */
18911 rs6000_trampoline_size (void)
18913 int ret = 0;
18915 switch (DEFAULT_ABI)
18917 default:
18918 gcc_unreachable ();
18920 case ABI_AIX:
18921 ret = (TARGET_32BIT) ? 12 : 24;
18922 break;
18924 case ABI_ELFv2:
18925 gcc_assert (!TARGET_32BIT);
18926 ret = 32;
18927 break;
18929 case ABI_DARWIN:
18930 case ABI_V4:
18931 ret = (TARGET_32BIT) ? 40 : 48;
18932 break;
18935 return ret;
18938 /* Emit RTL insns to initialize the variable parts of a trampoline.
18939 FNADDR is an RTX for the address of the function's pure code.
18940 CXT is an RTX for the static chain value for the function. */
18942 static void
18943 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18945 int regsize = (TARGET_32BIT) ? 4 : 8;
18946 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
18947 rtx ctx_reg = force_reg (Pmode, cxt);
18948 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
18950 switch (DEFAULT_ABI)
18952 default:
18953 gcc_unreachable ();
18955 /* Under AIX, just build the 3 word function descriptor */
18956 case ABI_AIX:
18958 rtx fnmem, fn_reg, toc_reg;
18960 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
18961 error ("you cannot take the address of a nested function if you use "
18962 "the %qs option", "-mno-pointers-to-nested-functions");
18964 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
18965 fn_reg = gen_reg_rtx (Pmode);
18966 toc_reg = gen_reg_rtx (Pmode);
18968 /* Macro to shorten the code expansions below. */
18969 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18971 m_tramp = replace_equiv_address (m_tramp, addr);
18973 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
18974 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
18975 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
18976 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
18977 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
18979 # undef MEM_PLUS
18981 break;
18983 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18984 case ABI_ELFv2:
18985 case ABI_DARWIN:
18986 case ABI_V4:
18987 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
18988 LCT_NORMAL, VOIDmode,
18989 addr, Pmode,
18990 GEN_INT (rs6000_trampoline_size ()), SImode,
18991 fnaddr, Pmode,
18992 ctx_reg, Pmode);
18993 break;
18998 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
18999 identifier as an argument, so the front end shouldn't look it up. */
19001 static bool
19002 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19004 return is_attribute_p ("altivec", attr_id);
19007 /* Handle the "altivec" attribute. The attribute may have
19008 arguments as follows:
19010 __attribute__((altivec(vector__)))
19011 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19012 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19014 and may appear more than once (e.g., 'vector bool char') in a
19015 given declaration. */
19017 static tree
19018 rs6000_handle_altivec_attribute (tree *node,
19019 tree name ATTRIBUTE_UNUSED,
19020 tree args,
19021 int flags ATTRIBUTE_UNUSED,
19022 bool *no_add_attrs)
19024 tree type = *node, result = NULL_TREE;
19025 machine_mode mode;
19026 int unsigned_p;
19027 char altivec_type
19028 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19029 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19030 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19031 : '?');
19033 while (POINTER_TYPE_P (type)
19034 || TREE_CODE (type) == FUNCTION_TYPE
19035 || TREE_CODE (type) == METHOD_TYPE
19036 || TREE_CODE (type) == ARRAY_TYPE)
19037 type = TREE_TYPE (type);
19039 mode = TYPE_MODE (type);
19041 /* Check for invalid AltiVec type qualifiers. */
19042 if (type == long_double_type_node)
19043 error ("use of %<long double%> in AltiVec types is invalid");
19044 else if (type == boolean_type_node)
19045 error ("use of boolean types in AltiVec types is invalid");
19046 else if (TREE_CODE (type) == COMPLEX_TYPE)
19047 error ("use of %<complex%> in AltiVec types is invalid");
19048 else if (DECIMAL_FLOAT_MODE_P (mode))
19049 error ("use of decimal floating point types in AltiVec types is invalid");
19050 else if (!TARGET_VSX)
19052 if (type == long_unsigned_type_node || type == long_integer_type_node)
19054 if (TARGET_64BIT)
19055 error ("use of %<long%> in AltiVec types is invalid for "
19056 "64-bit code without %qs", "-mvsx");
19057 else if (rs6000_warn_altivec_long)
19058 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19059 "use %<int%>");
19061 else if (type == long_long_unsigned_type_node
19062 || type == long_long_integer_type_node)
19063 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19064 "-mvsx");
19065 else if (type == double_type_node)
19066 error ("use of %<double%> in AltiVec types is invalid without %qs",
19067 "-mvsx");
19070 switch (altivec_type)
19072 case 'v':
19073 unsigned_p = TYPE_UNSIGNED (type);
19074 switch (mode)
19076 case E_TImode:
19077 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19078 break;
19079 case E_DImode:
19080 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19081 break;
19082 case E_SImode:
19083 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19084 break;
19085 case E_HImode:
19086 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19087 break;
19088 case E_QImode:
19089 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19090 break;
19091 case E_SFmode: result = V4SF_type_node; break;
19092 case E_DFmode: result = V2DF_type_node; break;
19093 /* If the user says 'vector int bool', we may be handed the 'bool'
19094 attribute _before_ the 'vector' attribute, and so select the
19095 proper type in the 'b' case below. */
19096 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19097 case E_V2DImode: case E_V2DFmode:
19098 result = type;
19099 default: break;
19101 break;
19102 case 'b':
19103 switch (mode)
19105 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19106 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19107 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19108 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19109 default: break;
19111 break;
19112 case 'p':
19113 switch (mode)
19115 case E_V8HImode: result = pixel_V8HI_type_node;
19116 default: break;
19118 default: break;
19121 /* Propagate qualifiers attached to the element type
19122 onto the vector type. */
19123 if (result && result != type && TYPE_QUALS (type))
19124 result = build_qualified_type (result, TYPE_QUALS (type));
19126 *no_add_attrs = true; /* No need to hang on to the attribute. */
19128 if (result)
19129 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19131 return NULL_TREE;
19134 /* AltiVec defines five built-in scalar types that serve as vector
19135 elements; we must teach the compiler how to mangle them. The 128-bit
19136 floating point mangling is target-specific as well. */
19138 static const char *
19139 rs6000_mangle_type (const_tree type)
19141 type = TYPE_MAIN_VARIANT (type);
19143 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19144 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19145 return NULL;
19147 if (type == bool_char_type_node) return "U6__boolc";
19148 if (type == bool_short_type_node) return "U6__bools";
19149 if (type == pixel_type_node) return "u7__pixel";
19150 if (type == bool_int_type_node) return "U6__booli";
19151 if (type == bool_long_long_type_node) return "U6__boolx";
19153 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19154 return "g";
19155 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19156 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19158 /* For all other types, use the default mangling. */
19159 return NULL;
19162 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19163 struct attribute_spec.handler. */
19165 static tree
19166 rs6000_handle_longcall_attribute (tree *node, tree name,
19167 tree args ATTRIBUTE_UNUSED,
19168 int flags ATTRIBUTE_UNUSED,
19169 bool *no_add_attrs)
19171 if (TREE_CODE (*node) != FUNCTION_TYPE
19172 && TREE_CODE (*node) != FIELD_DECL
19173 && TREE_CODE (*node) != TYPE_DECL)
19175 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19176 name);
19177 *no_add_attrs = true;
19180 return NULL_TREE;
19183 /* Set longcall attributes on all functions declared when
19184 rs6000_default_long_calls is true. */
19185 static void
19186 rs6000_set_default_type_attributes (tree type)
19188 if (rs6000_default_long_calls
19189 && (TREE_CODE (type) == FUNCTION_TYPE
19190 || TREE_CODE (type) == METHOD_TYPE))
19191 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19192 NULL_TREE,
19193 TYPE_ATTRIBUTES (type));
19195 #if TARGET_MACHO
19196 darwin_set_default_type_attributes (type);
19197 #endif
19200 /* Return a reference suitable for calling a function with the
19201 longcall attribute. */
19203 static rtx
19204 rs6000_longcall_ref (rtx call_ref, rtx arg)
19206 /* System V adds '.' to the internal name, so skip them. */
19207 const char *call_name = XSTR (call_ref, 0);
19208 if (*call_name == '.')
19210 while (*call_name == '.')
19211 call_name++;
19213 tree node = get_identifier (call_name);
19214 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19217 if (TARGET_PLTSEQ)
19219 rtx base = const0_rtx;
19220 int regno = 12;
19221 if (rs6000_pcrel_p (cfun))
19223 rtx reg = gen_rtx_REG (Pmode, regno);
19224 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19225 UNSPEC_PLT_PCREL);
19226 emit_insn (gen_rtx_SET (reg, u));
19227 return reg;
19230 if (DEFAULT_ABI == ABI_ELFv2)
19231 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19232 else
19234 if (flag_pic)
19235 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19236 regno = 11;
19238 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19239 may be used by a function global entry point. For SysV4, r11
19240 is used by __glink_PLTresolve lazy resolver entry. */
19241 rtx reg = gen_rtx_REG (Pmode, regno);
19242 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19243 UNSPEC_PLT16_HA);
19244 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19245 UNSPEC_PLT16_LO);
19246 emit_insn (gen_rtx_SET (reg, hi));
19247 emit_insn (gen_rtx_SET (reg, lo));
19248 return reg;
19251 return force_reg (Pmode, call_ref);
19254 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19255 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19256 #endif
19258 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19259 struct attribute_spec.handler. */
19260 static tree
19261 rs6000_handle_struct_attribute (tree *node, tree name,
19262 tree args ATTRIBUTE_UNUSED,
19263 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19265 tree *type = NULL;
19266 if (DECL_P (*node))
19268 if (TREE_CODE (*node) == TYPE_DECL)
19269 type = &TREE_TYPE (*node);
19271 else
19272 type = node;
19274 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19275 || TREE_CODE (*type) == UNION_TYPE)))
19277 warning (OPT_Wattributes, "%qE attribute ignored", name);
19278 *no_add_attrs = true;
19281 else if ((is_attribute_p ("ms_struct", name)
19282 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19283 || ((is_attribute_p ("gcc_struct", name)
19284 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19286 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19287 name);
19288 *no_add_attrs = true;
19291 return NULL_TREE;
19294 static bool
19295 rs6000_ms_bitfield_layout_p (const_tree record_type)
19297 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19298 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19299 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19302 #ifdef USING_ELFOS_H
19304 /* A get_unnamed_section callback, used for switching to toc_section. */
19306 static void
19307 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19309 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19310 && TARGET_MINIMAL_TOC)
19312 if (!toc_initialized)
19314 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19315 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19316 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19317 fprintf (asm_out_file, "\t.tc ");
19318 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19319 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19320 fprintf (asm_out_file, "\n");
19322 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19323 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19324 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19325 fprintf (asm_out_file, " = .+32768\n");
19326 toc_initialized = 1;
19328 else
19329 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19331 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19333 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19334 if (!toc_initialized)
19336 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19337 toc_initialized = 1;
19340 else
19342 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19343 if (!toc_initialized)
19345 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19346 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19347 fprintf (asm_out_file, " = .+32768\n");
19348 toc_initialized = 1;
19353 /* Implement TARGET_ASM_INIT_SECTIONS. */
19355 static void
19356 rs6000_elf_asm_init_sections (void)
19358 toc_section
19359 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19361 sdata2_section
19362 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19363 SDATA2_SECTION_ASM_OP);
19366 /* Implement TARGET_SELECT_RTX_SECTION. */
19368 static section *
19369 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19370 unsigned HOST_WIDE_INT align)
19372 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19373 return toc_section;
19374 else
19375 return default_elf_select_rtx_section (mode, x, align);
19378 /* For a SYMBOL_REF, set generic flags and then perform some
19379 target-specific processing.
19381 When the AIX ABI is requested on a non-AIX system, replace the
19382 function name with the real name (with a leading .) rather than the
19383 function descriptor name. This saves a lot of overriding code to
19384 read the prefixes. */
19386 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19387 static void
19388 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19390 default_encode_section_info (decl, rtl, first);
19392 if (first
19393 && TREE_CODE (decl) == FUNCTION_DECL
19394 && !TARGET_AIX
19395 && DEFAULT_ABI == ABI_AIX)
19397 rtx sym_ref = XEXP (rtl, 0);
19398 size_t len = strlen (XSTR (sym_ref, 0));
19399 char *str = XALLOCAVEC (char, len + 2);
19400 str[0] = '.';
19401 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19402 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19406 static inline bool
19407 compare_section_name (const char *section, const char *templ)
19409 int len;
19411 len = strlen (templ);
19412 return (strncmp (section, templ, len) == 0
19413 && (section[len] == 0 || section[len] == '.'));
19416 bool
19417 rs6000_elf_in_small_data_p (const_tree decl)
19419 if (rs6000_sdata == SDATA_NONE)
19420 return false;
19422 /* We want to merge strings, so we never consider them small data. */
19423 if (TREE_CODE (decl) == STRING_CST)
19424 return false;
19426 /* Functions are never in the small data area. */
19427 if (TREE_CODE (decl) == FUNCTION_DECL)
19428 return false;
19430 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19432 const char *section = DECL_SECTION_NAME (decl);
19433 if (compare_section_name (section, ".sdata")
19434 || compare_section_name (section, ".sdata2")
19435 || compare_section_name (section, ".gnu.linkonce.s")
19436 || compare_section_name (section, ".sbss")
19437 || compare_section_name (section, ".sbss2")
19438 || compare_section_name (section, ".gnu.linkonce.sb")
19439 || strcmp (section, ".PPC.EMB.sdata0") == 0
19440 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19441 return true;
19443 else
19445 /* If we are told not to put readonly data in sdata, then don't. */
19446 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19447 && !rs6000_readonly_in_sdata)
19448 return false;
19450 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19452 if (size > 0
19453 && size <= g_switch_value
19454 /* If it's not public, and we're not going to reference it there,
19455 there's no need to put it in the small data section. */
19456 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19457 return true;
19460 return false;
19463 #endif /* USING_ELFOS_H */
19465 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19467 static bool
19468 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19470 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19473 /* Do not place thread-local symbols refs in the object blocks. */
19475 static bool
19476 rs6000_use_blocks_for_decl_p (const_tree decl)
19478 return !DECL_THREAD_LOCAL_P (decl);
19481 /* Return a REG that occurs in ADDR with coefficient 1.
19482 ADDR can be effectively incremented by incrementing REG.
19484 r0 is special and we must not select it as an address
19485 register by this routine since our caller will try to
19486 increment the returned register via an "la" instruction. */
19489 find_addr_reg (rtx addr)
19491 while (GET_CODE (addr) == PLUS)
19493 if (REG_P (XEXP (addr, 0))
19494 && REGNO (XEXP (addr, 0)) != 0)
19495 addr = XEXP (addr, 0);
19496 else if (REG_P (XEXP (addr, 1))
19497 && REGNO (XEXP (addr, 1)) != 0)
19498 addr = XEXP (addr, 1);
19499 else if (CONSTANT_P (XEXP (addr, 0)))
19500 addr = XEXP (addr, 1);
19501 else if (CONSTANT_P (XEXP (addr, 1)))
19502 addr = XEXP (addr, 0);
19503 else
19504 gcc_unreachable ();
19506 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19507 return addr;
19510 void
19511 rs6000_fatal_bad_address (rtx op)
19513 fatal_insn ("bad address", op);
19516 #if TARGET_MACHO
19518 vec<branch_island, va_gc> *branch_islands;
19520 /* Remember to generate a branch island for far calls to the given
19521 function. */
19523 static void
19524 add_compiler_branch_island (tree label_name, tree function_name,
19525 int line_number)
19527 branch_island bi = {function_name, label_name, line_number};
19528 vec_safe_push (branch_islands, bi);
19531 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19532 already there or not. */
19534 static int
19535 no_previous_def (tree function_name)
19537 branch_island *bi;
19538 unsigned ix;
19540 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19541 if (function_name == bi->function_name)
19542 return 0;
19543 return 1;
19546 /* GET_PREV_LABEL gets the label name from the previous definition of
19547 the function. */
19549 static tree
19550 get_prev_label (tree function_name)
19552 branch_island *bi;
19553 unsigned ix;
19555 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19556 if (function_name == bi->function_name)
19557 return bi->label_name;
19558 return NULL_TREE;
19561 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19563 void
19564 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19566 unsigned int length;
19567 char *symbol_name, *lazy_ptr_name;
19568 char *local_label_0;
19569 static unsigned label = 0;
19571 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19572 symb = (*targetm.strip_name_encoding) (symb);
19574 length = strlen (symb);
19575 symbol_name = XALLOCAVEC (char, length + 32);
19576 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19578 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19579 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19581 if (MACHOPIC_PURE)
19583 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19584 fprintf (file, "\t.align 5\n");
19586 fprintf (file, "%s:\n", stub);
19587 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19589 label++;
19590 local_label_0 = XALLOCAVEC (char, 16);
19591 sprintf (local_label_0, "L%u$spb", label);
19593 fprintf (file, "\tmflr r0\n");
19594 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19595 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19596 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19597 lazy_ptr_name, local_label_0);
19598 fprintf (file, "\tmtlr r0\n");
19599 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19600 (TARGET_64BIT ? "ldu" : "lwzu"),
19601 lazy_ptr_name, local_label_0);
19602 fprintf (file, "\tmtctr r12\n");
19603 fprintf (file, "\tbctr\n");
19605 else /* mdynamic-no-pic or mkernel. */
19607 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19608 fprintf (file, "\t.align 4\n");
19610 fprintf (file, "%s:\n", stub);
19611 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19613 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19614 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19615 (TARGET_64BIT ? "ldu" : "lwzu"),
19616 lazy_ptr_name);
19617 fprintf (file, "\tmtctr r12\n");
19618 fprintf (file, "\tbctr\n");
19621 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19622 fprintf (file, "%s:\n", lazy_ptr_name);
19623 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19624 fprintf (file, "%sdyld_stub_binding_helper\n",
19625 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19628 /* Legitimize PIC addresses. If the address is already
19629 position-independent, we return ORIG. Newly generated
19630 position-independent addresses go into a reg. This is REG if non
19631 zero, otherwise we allocate register(s) as necessary. */
19633 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19636 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19637 rtx reg)
19639 rtx base, offset;
19641 if (reg == NULL && !reload_completed)
19642 reg = gen_reg_rtx (Pmode);
19644 if (GET_CODE (orig) == CONST)
19646 rtx reg_temp;
19648 if (GET_CODE (XEXP (orig, 0)) == PLUS
19649 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19650 return orig;
19652 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19654 /* Use a different reg for the intermediate value, as
19655 it will be marked UNCHANGING. */
19656 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19657 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19658 Pmode, reg_temp);
19659 offset =
19660 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19661 Pmode, reg);
19663 if (CONST_INT_P (offset))
19665 if (SMALL_INT (offset))
19666 return plus_constant (Pmode, base, INTVAL (offset));
19667 else if (!reload_completed)
19668 offset = force_reg (Pmode, offset);
19669 else
19671 rtx mem = force_const_mem (Pmode, orig);
19672 return machopic_legitimize_pic_address (mem, Pmode, reg);
19675 return gen_rtx_PLUS (Pmode, base, offset);
19678 /* Fall back on generic machopic code. */
19679 return machopic_legitimize_pic_address (orig, mode, reg);
19682 /* Output a .machine directive for the Darwin assembler, and call
19683 the generic start_file routine. */
19685 static void
19686 rs6000_darwin_file_start (void)
19688 static const struct
19690 const char *arg;
19691 const char *name;
19692 HOST_WIDE_INT if_set;
19693 } mapping[] = {
19694 { "ppc64", "ppc64", MASK_64BIT },
19695 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19696 { "power4", "ppc970", 0 },
19697 { "G5", "ppc970", 0 },
19698 { "7450", "ppc7450", 0 },
19699 { "7400", "ppc7400", MASK_ALTIVEC },
19700 { "G4", "ppc7400", 0 },
19701 { "750", "ppc750", 0 },
19702 { "740", "ppc750", 0 },
19703 { "G3", "ppc750", 0 },
19704 { "604e", "ppc604e", 0 },
19705 { "604", "ppc604", 0 },
19706 { "603e", "ppc603", 0 },
19707 { "603", "ppc603", 0 },
19708 { "601", "ppc601", 0 },
19709 { NULL, "ppc", 0 } };
19710 const char *cpu_id = "";
19711 size_t i;
19713 rs6000_file_start ();
19714 darwin_file_start ();
19716 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19718 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19719 cpu_id = rs6000_default_cpu;
19721 if (global_options_set.x_rs6000_cpu_index)
19722 cpu_id = processor_target_table[rs6000_cpu_index].name;
19724 /* Look through the mapping array. Pick the first name that either
19725 matches the argument, has a bit set in IF_SET that is also set
19726 in the target flags, or has a NULL name. */
19728 i = 0;
19729 while (mapping[i].arg != NULL
19730 && strcmp (mapping[i].arg, cpu_id) != 0
19731 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19732 i++;
19734 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19737 #endif /* TARGET_MACHO */
19739 #if TARGET_ELF
19740 static int
19741 rs6000_elf_reloc_rw_mask (void)
19743 if (flag_pic)
19744 return 3;
19745 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19746 return 2;
19747 else
19748 return 0;
19751 /* Record an element in the table of global constructors. SYMBOL is
19752 a SYMBOL_REF of the function to be called; PRIORITY is a number
19753 between 0 and MAX_INIT_PRIORITY.
19755 This differs from default_named_section_asm_out_constructor in
19756 that we have special handling for -mrelocatable. */
19758 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19759 static void
19760 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19762 const char *section = ".ctors";
19763 char buf[18];
19765 if (priority != DEFAULT_INIT_PRIORITY)
19767 sprintf (buf, ".ctors.%.5u",
19768 /* Invert the numbering so the linker puts us in the proper
19769 order; constructors are run from right to left, and the
19770 linker sorts in increasing order. */
19771 MAX_INIT_PRIORITY - priority);
19772 section = buf;
19775 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19776 assemble_align (POINTER_SIZE);
19778 if (DEFAULT_ABI == ABI_V4
19779 && (TARGET_RELOCATABLE || flag_pic > 1))
19781 fputs ("\t.long (", asm_out_file);
19782 output_addr_const (asm_out_file, symbol);
19783 fputs (")@fixup\n", asm_out_file);
19785 else
19786 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19789 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19790 static void
19791 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19793 const char *section = ".dtors";
19794 char buf[18];
19796 if (priority != DEFAULT_INIT_PRIORITY)
19798 sprintf (buf, ".dtors.%.5u",
19799 /* Invert the numbering so the linker puts us in the proper
19800 order; constructors are run from right to left, and the
19801 linker sorts in increasing order. */
19802 MAX_INIT_PRIORITY - priority);
19803 section = buf;
19806 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19807 assemble_align (POINTER_SIZE);
19809 if (DEFAULT_ABI == ABI_V4
19810 && (TARGET_RELOCATABLE || flag_pic > 1))
19812 fputs ("\t.long (", asm_out_file);
19813 output_addr_const (asm_out_file, symbol);
19814 fputs (")@fixup\n", asm_out_file);
19816 else
19817 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19820 void
19821 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19823 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19825 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19826 ASM_OUTPUT_LABEL (file, name);
19827 fputs (DOUBLE_INT_ASM_OP, file);
19828 rs6000_output_function_entry (file, name);
19829 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19830 if (DOT_SYMBOLS)
19832 fputs ("\t.size\t", file);
19833 assemble_name (file, name);
19834 fputs (",24\n\t.type\t.", file);
19835 assemble_name (file, name);
19836 fputs (",@function\n", file);
19837 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19839 fputs ("\t.globl\t.", file);
19840 assemble_name (file, name);
19841 putc ('\n', file);
19844 else
19845 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19846 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19847 rs6000_output_function_entry (file, name);
19848 fputs (":\n", file);
19849 return;
19852 int uses_toc;
19853 if (DEFAULT_ABI == ABI_V4
19854 && (TARGET_RELOCATABLE || flag_pic > 1)
19855 && !TARGET_SECURE_PLT
19856 && (!constant_pool_empty_p () || crtl->profile)
19857 && (uses_toc = uses_TOC ()))
19859 char buf[256];
19861 if (uses_toc == 2)
19862 switch_to_other_text_partition ();
19863 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19865 fprintf (file, "\t.long ");
19866 assemble_name (file, toc_label_name);
19867 need_toc_init = 1;
19868 putc ('-', file);
19869 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19870 assemble_name (file, buf);
19871 putc ('\n', file);
19872 if (uses_toc == 2)
19873 switch_to_other_text_partition ();
19876 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19877 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19879 if (TARGET_CMODEL == CMODEL_LARGE
19880 && rs6000_global_entry_point_prologue_needed_p ())
19882 char buf[256];
19884 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19886 fprintf (file, "\t.quad .TOC.-");
19887 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19888 assemble_name (file, buf);
19889 putc ('\n', file);
19892 if (DEFAULT_ABI == ABI_AIX)
19894 const char *desc_name, *orig_name;
19896 orig_name = (*targetm.strip_name_encoding) (name);
19897 desc_name = orig_name;
19898 while (*desc_name == '.')
19899 desc_name++;
19901 if (TREE_PUBLIC (decl))
19902 fprintf (file, "\t.globl %s\n", desc_name);
19904 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19905 fprintf (file, "%s:\n", desc_name);
19906 fprintf (file, "\t.long %s\n", orig_name);
19907 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19908 fputs ("\t.long 0\n", file);
19909 fprintf (file, "\t.previous\n");
19911 ASM_OUTPUT_LABEL (file, name);
19914 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19915 static void
19916 rs6000_elf_file_end (void)
19918 #ifdef HAVE_AS_GNU_ATTRIBUTE
19919 /* ??? The value emitted depends on options active at file end.
19920 Assume anyone using #pragma or attributes that might change
19921 options knows what they are doing. */
19922 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19923 && rs6000_passes_float)
19925 int fp;
19927 if (TARGET_HARD_FLOAT)
19928 fp = 1;
19929 else
19930 fp = 2;
19931 if (rs6000_passes_long_double)
19933 if (!TARGET_LONG_DOUBLE_128)
19934 fp |= 2 * 4;
19935 else if (TARGET_IEEEQUAD)
19936 fp |= 3 * 4;
19937 else
19938 fp |= 1 * 4;
19940 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19942 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19944 if (rs6000_passes_vector)
19945 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19946 (TARGET_ALTIVEC_ABI ? 2 : 1));
19947 if (rs6000_returns_struct)
19948 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
19949 aix_struct_return ? 2 : 1);
19951 #endif
19952 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19953 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
19954 file_end_indicate_exec_stack ();
19955 #endif
19957 if (flag_split_stack)
19958 file_end_indicate_split_stack ();
19960 if (cpu_builtin_p)
19962 /* We have expanded a CPU builtin, so we need to emit a reference to
19963 the special symbol that LIBC uses to declare it supports the
19964 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19965 switch_to_section (data_section);
19966 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
19967 fprintf (asm_out_file, "\t%s %s\n",
19968 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
19971 #endif
19973 #if TARGET_XCOFF
19975 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19976 #define HAVE_XCOFF_DWARF_EXTRAS 0
19977 #endif
19979 static enum unwind_info_type
19980 rs6000_xcoff_debug_unwind_info (void)
19982 return UI_NONE;
19985 static void
19986 rs6000_xcoff_asm_output_anchor (rtx symbol)
19988 char buffer[100];
19990 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
19991 SYMBOL_REF_BLOCK_OFFSET (symbol));
19992 fprintf (asm_out_file, "%s", SET_ASM_OP);
19993 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
19994 fprintf (asm_out_file, ",");
19995 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
19996 fprintf (asm_out_file, "\n");
19999 static void
20000 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20002 fputs (GLOBAL_ASM_OP, stream);
20003 RS6000_OUTPUT_BASENAME (stream, name);
20004 putc ('\n', stream);
20007 /* A get_unnamed_decl callback, used for read-only sections. PTR
20008 points to the section string variable. */
20010 static void
20011 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20013 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20014 *(const char *const *) directive,
20015 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20018 /* Likewise for read-write sections. */
20020 static void
20021 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20023 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20024 *(const char *const *) directive,
20025 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20028 static void
20029 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20031 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20032 *(const char *const *) directive,
20033 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20036 /* A get_unnamed_section callback, used for switching to toc_section. */
20038 static void
20039 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20041 if (TARGET_MINIMAL_TOC)
20043 /* toc_section is always selected at least once from
20044 rs6000_xcoff_file_start, so this is guaranteed to
20045 always be defined once and only once in each file. */
20046 if (!toc_initialized)
20048 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20049 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20050 toc_initialized = 1;
20052 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20053 (TARGET_32BIT ? "" : ",3"));
20055 else
20056 fputs ("\t.toc\n", asm_out_file);
20059 /* Implement TARGET_ASM_INIT_SECTIONS. */
20061 static void
20062 rs6000_xcoff_asm_init_sections (void)
20064 read_only_data_section
20065 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20066 &xcoff_read_only_section_name);
20068 private_data_section
20069 = get_unnamed_section (SECTION_WRITE,
20070 rs6000_xcoff_output_readwrite_section_asm_op,
20071 &xcoff_private_data_section_name);
20073 read_only_private_data_section
20074 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20075 &xcoff_private_rodata_section_name);
20077 tls_data_section
20078 = get_unnamed_section (SECTION_TLS,
20079 rs6000_xcoff_output_tls_section_asm_op,
20080 &xcoff_tls_data_section_name);
20082 tls_private_data_section
20083 = get_unnamed_section (SECTION_TLS,
20084 rs6000_xcoff_output_tls_section_asm_op,
20085 &xcoff_private_data_section_name);
20087 toc_section
20088 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20090 readonly_data_section = read_only_data_section;
20093 static int
20094 rs6000_xcoff_reloc_rw_mask (void)
20096 return 3;
20099 static void
20100 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20101 tree decl ATTRIBUTE_UNUSED)
20103 int smclass;
20104 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20106 if (flags & SECTION_EXCLUDE)
20107 smclass = 4;
20108 else if (flags & SECTION_DEBUG)
20110 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20111 return;
20113 else if (flags & SECTION_CODE)
20114 smclass = 0;
20115 else if (flags & SECTION_TLS)
20116 smclass = 3;
20117 else if (flags & SECTION_WRITE)
20118 smclass = 2;
20119 else
20120 smclass = 1;
20122 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20123 (flags & SECTION_CODE) ? "." : "",
20124 name, suffix[smclass], flags & SECTION_ENTSIZE);
20127 #define IN_NAMED_SECTION(DECL) \
20128 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20129 && DECL_SECTION_NAME (DECL) != NULL)
20131 static section *
20132 rs6000_xcoff_select_section (tree decl, int reloc,
20133 unsigned HOST_WIDE_INT align)
20135 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20136 named section. */
20137 if (align > BIGGEST_ALIGNMENT)
20139 resolve_unique_section (decl, reloc, true);
20140 if (IN_NAMED_SECTION (decl))
20141 return get_named_section (decl, NULL, reloc);
20144 if (decl_readonly_section (decl, reloc))
20146 if (TREE_PUBLIC (decl))
20147 return read_only_data_section;
20148 else
20149 return read_only_private_data_section;
20151 else
20153 #if HAVE_AS_TLS
20154 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20156 if (TREE_PUBLIC (decl))
20157 return tls_data_section;
20158 else if (bss_initializer_p (decl))
20160 /* Convert to COMMON to emit in BSS. */
20161 DECL_COMMON (decl) = 1;
20162 return tls_comm_section;
20164 else
20165 return tls_private_data_section;
20167 else
20168 #endif
20169 if (TREE_PUBLIC (decl))
20170 return data_section;
20171 else
20172 return private_data_section;
20176 static void
20177 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20179 const char *name;
20181 /* Use select_section for private data and uninitialized data with
20182 alignment <= BIGGEST_ALIGNMENT. */
20183 if (!TREE_PUBLIC (decl)
20184 || DECL_COMMON (decl)
20185 || (DECL_INITIAL (decl) == NULL_TREE
20186 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20187 || DECL_INITIAL (decl) == error_mark_node
20188 || (flag_zero_initialized_in_bss
20189 && initializer_zerop (DECL_INITIAL (decl))))
20190 return;
20192 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20193 name = (*targetm.strip_name_encoding) (name);
20194 set_decl_section_name (decl, name);
20197 /* Select section for constant in constant pool.
20199 On RS/6000, all constants are in the private read-only data area.
20200 However, if this is being placed in the TOC it must be output as a
20201 toc entry. */
20203 static section *
20204 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20205 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20207 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20208 return toc_section;
20209 else
20210 return read_only_private_data_section;
20213 /* Remove any trailing [DS] or the like from the symbol name. */
20215 static const char *
20216 rs6000_xcoff_strip_name_encoding (const char *name)
20218 size_t len;
20219 if (*name == '*')
20220 name++;
20221 len = strlen (name);
20222 if (name[len - 1] == ']')
20223 return ggc_alloc_string (name, len - 4);
20224 else
20225 return name;
20228 /* Section attributes. AIX is always PIC. */
20230 static unsigned int
20231 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20233 unsigned int align;
20234 unsigned int flags = default_section_type_flags (decl, name, reloc);
20236 /* Align to at least UNIT size. */
20237 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20238 align = MIN_UNITS_PER_WORD;
20239 else
20240 /* Increase alignment of large objects if not already stricter. */
20241 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20242 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20243 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20245 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20248 /* Output at beginning of assembler file.
20250 Initialize the section names for the RS/6000 at this point.
20252 Specify filename, including full path, to assembler.
20254 We want to go into the TOC section so at least one .toc will be emitted.
20255 Also, in order to output proper .bs/.es pairs, we need at least one static
20256 [RW] section emitted.
20258 Finally, declare mcount when profiling to make the assembler happy. */
20260 static void
20261 rs6000_xcoff_file_start (void)
20263 rs6000_gen_section_name (&xcoff_bss_section_name,
20264 main_input_filename, ".bss_");
20265 rs6000_gen_section_name (&xcoff_private_data_section_name,
20266 main_input_filename, ".rw_");
20267 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20268 main_input_filename, ".rop_");
20269 rs6000_gen_section_name (&xcoff_read_only_section_name,
20270 main_input_filename, ".ro_");
20271 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20272 main_input_filename, ".tls_");
20273 rs6000_gen_section_name (&xcoff_tbss_section_name,
20274 main_input_filename, ".tbss_[UL]");
20276 fputs ("\t.file\t", asm_out_file);
20277 output_quoted_string (asm_out_file, main_input_filename);
20278 fputc ('\n', asm_out_file);
20279 if (write_symbols != NO_DEBUG)
20280 switch_to_section (private_data_section);
20281 switch_to_section (toc_section);
20282 switch_to_section (text_section);
20283 if (profile_flag)
20284 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20285 rs6000_file_start ();
20288 /* Output at end of assembler file.
20289 On the RS/6000, referencing data should automatically pull in text. */
20291 static void
20292 rs6000_xcoff_file_end (void)
20294 switch_to_section (text_section);
20295 fputs ("_section_.text:\n", asm_out_file);
20296 switch_to_section (data_section);
20297 fputs (TARGET_32BIT
20298 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20299 asm_out_file);
20302 struct declare_alias_data
20304 FILE *file;
20305 bool function_descriptor;
20308 /* Declare alias N. A helper function for for_node_and_aliases. */
20310 static bool
20311 rs6000_declare_alias (struct symtab_node *n, void *d)
20313 struct declare_alias_data *data = (struct declare_alias_data *)d;
20314 /* Main symbol is output specially, because varasm machinery does part of
20315 the job for us - we do not need to declare .globl/lglobs and such. */
20316 if (!n->alias || n->weakref)
20317 return false;
20319 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20320 return false;
20322 /* Prevent assemble_alias from trying to use .set pseudo operation
20323 that does not behave as expected by the middle-end. */
20324 TREE_ASM_WRITTEN (n->decl) = true;
20326 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20327 char *buffer = (char *) alloca (strlen (name) + 2);
20328 char *p;
20329 int dollar_inside = 0;
20331 strcpy (buffer, name);
20332 p = strchr (buffer, '$');
20333 while (p) {
20334 *p = '_';
20335 dollar_inside++;
20336 p = strchr (p + 1, '$');
20338 if (TREE_PUBLIC (n->decl))
20340 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20342 if (dollar_inside) {
20343 if (data->function_descriptor)
20344 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20345 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20347 if (data->function_descriptor)
20349 fputs ("\t.globl .", data->file);
20350 RS6000_OUTPUT_BASENAME (data->file, buffer);
20351 putc ('\n', data->file);
20353 fputs ("\t.globl ", data->file);
20354 RS6000_OUTPUT_BASENAME (data->file, buffer);
20355 putc ('\n', data->file);
20357 #ifdef ASM_WEAKEN_DECL
20358 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20359 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20360 #endif
20362 else
20364 if (dollar_inside)
20366 if (data->function_descriptor)
20367 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20368 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20370 if (data->function_descriptor)
20372 fputs ("\t.lglobl .", data->file);
20373 RS6000_OUTPUT_BASENAME (data->file, buffer);
20374 putc ('\n', data->file);
20376 fputs ("\t.lglobl ", data->file);
20377 RS6000_OUTPUT_BASENAME (data->file, buffer);
20378 putc ('\n', data->file);
20380 if (data->function_descriptor)
20381 fputs (".", data->file);
20382 RS6000_OUTPUT_BASENAME (data->file, buffer);
20383 fputs (":\n", data->file);
20384 return false;
20388 #ifdef HAVE_GAS_HIDDEN
20389 /* Helper function to calculate visibility of a DECL
20390 and return the value as a const string. */
20392 static const char *
20393 rs6000_xcoff_visibility (tree decl)
20395 static const char * const visibility_types[] = {
20396 "", ",protected", ",hidden", ",internal"
20399 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20400 return visibility_types[vis];
20402 #endif
20405 /* This macro produces the initial definition of a function name.
20406 On the RS/6000, we need to place an extra '.' in the function name and
20407 output the function descriptor.
20408 Dollar signs are converted to underscores.
20410 The csect for the function will have already been created when
20411 text_section was selected. We do have to go back to that csect, however.
20413 The third and fourth parameters to the .function pseudo-op (16 and 044)
20414 are placeholders which no longer have any use.
20416 Because AIX assembler's .set command has unexpected semantics, we output
20417 all aliases as alternative labels in front of the definition. */
20419 void
20420 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20422 char *buffer = (char *) alloca (strlen (name) + 1);
20423 char *p;
20424 int dollar_inside = 0;
20425 struct declare_alias_data data = {file, false};
20427 strcpy (buffer, name);
20428 p = strchr (buffer, '$');
20429 while (p) {
20430 *p = '_';
20431 dollar_inside++;
20432 p = strchr (p + 1, '$');
20434 if (TREE_PUBLIC (decl))
20436 if (!RS6000_WEAK || !DECL_WEAK (decl))
20438 if (dollar_inside) {
20439 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20440 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20442 fputs ("\t.globl .", file);
20443 RS6000_OUTPUT_BASENAME (file, buffer);
20444 #ifdef HAVE_GAS_HIDDEN
20445 fputs (rs6000_xcoff_visibility (decl), file);
20446 #endif
20447 putc ('\n', file);
20450 else
20452 if (dollar_inside) {
20453 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20454 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20456 fputs ("\t.lglobl .", file);
20457 RS6000_OUTPUT_BASENAME (file, buffer);
20458 putc ('\n', file);
20460 fputs ("\t.csect ", file);
20461 RS6000_OUTPUT_BASENAME (file, buffer);
20462 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20463 RS6000_OUTPUT_BASENAME (file, buffer);
20464 fputs (":\n", file);
20465 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20466 &data, true);
20467 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20468 RS6000_OUTPUT_BASENAME (file, buffer);
20469 fputs (", TOC[tc0], 0\n", file);
20470 in_section = NULL;
20471 switch_to_section (function_section (decl));
20472 putc ('.', file);
20473 RS6000_OUTPUT_BASENAME (file, buffer);
20474 fputs (":\n", file);
20475 data.function_descriptor = true;
20476 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20477 &data, true);
20478 if (!DECL_IGNORED_P (decl))
20480 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20481 xcoffout_declare_function (file, decl, buffer);
20482 else if (write_symbols == DWARF2_DEBUG)
20484 name = (*targetm.strip_name_encoding) (name);
20485 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20488 return;
20492 /* Output assembly language to globalize a symbol from a DECL,
20493 possibly with visibility. */
20495 void
20496 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20498 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20499 fputs (GLOBAL_ASM_OP, stream);
20500 RS6000_OUTPUT_BASENAME (stream, name);
20501 #ifdef HAVE_GAS_HIDDEN
20502 fputs (rs6000_xcoff_visibility (decl), stream);
20503 #endif
20504 putc ('\n', stream);
20507 /* Output assembly language to define a symbol as COMMON from a DECL,
20508 possibly with visibility. */
20510 void
20511 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20512 tree decl ATTRIBUTE_UNUSED,
20513 const char *name,
20514 unsigned HOST_WIDE_INT size,
20515 unsigned HOST_WIDE_INT align)
20517 unsigned HOST_WIDE_INT align2 = 2;
20519 if (align > 32)
20520 align2 = floor_log2 (align / BITS_PER_UNIT);
20521 else if (size > 4)
20522 align2 = 3;
20524 fputs (COMMON_ASM_OP, stream);
20525 RS6000_OUTPUT_BASENAME (stream, name);
20527 fprintf (stream,
20528 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20529 size, align2);
20531 #ifdef HAVE_GAS_HIDDEN
20532 if (decl != NULL)
20533 fputs (rs6000_xcoff_visibility (decl), stream);
20534 #endif
20535 putc ('\n', stream);
20538 /* This macro produces the initial definition of a object (variable) name.
20539 Because AIX assembler's .set command has unexpected semantics, we output
20540 all aliases as alternative labels in front of the definition. */
20542 void
20543 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20545 struct declare_alias_data data = {file, false};
20546 RS6000_OUTPUT_BASENAME (file, name);
20547 fputs (":\n", file);
20548 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20549 &data, true);
20552 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20554 void
20555 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20557 fputs (integer_asm_op (size, FALSE), file);
20558 assemble_name (file, label);
20559 fputs ("-$", file);
20562 /* Output a symbol offset relative to the dbase for the current object.
20563 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20564 signed offsets.
20566 __gcc_unwind_dbase is embedded in all executables/libraries through
20567 libgcc/config/rs6000/crtdbase.S. */
20569 void
20570 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20572 fputs (integer_asm_op (size, FALSE), file);
20573 assemble_name (file, label);
20574 fputs("-__gcc_unwind_dbase", file);
20577 #ifdef HAVE_AS_TLS
20578 static void
20579 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20581 rtx symbol;
20582 int flags;
20583 const char *symname;
20585 default_encode_section_info (decl, rtl, first);
20587 /* Careful not to prod global register variables. */
20588 if (!MEM_P (rtl))
20589 return;
20590 symbol = XEXP (rtl, 0);
20591 if (!SYMBOL_REF_P (symbol))
20592 return;
20594 flags = SYMBOL_REF_FLAGS (symbol);
20596 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20597 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20599 SYMBOL_REF_FLAGS (symbol) = flags;
20601 /* Append mapping class to extern decls. */
20602 symname = XSTR (symbol, 0);
20603 if (decl /* sync condition with assemble_external () */
20604 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20605 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20606 || TREE_CODE (decl) == FUNCTION_DECL)
20607 && symname[strlen (symname) - 1] != ']')
20609 char *newname = (char *) alloca (strlen (symname) + 5);
20610 strcpy (newname, symname);
20611 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20612 ? "[DS]" : "[UA]"));
20613 XSTR (symbol, 0) = ggc_strdup (newname);
20616 #endif /* HAVE_AS_TLS */
20617 #endif /* TARGET_XCOFF */
20619 void
20620 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20621 const char *name, const char *val)
20623 fputs ("\t.weak\t", stream);
20624 RS6000_OUTPUT_BASENAME (stream, name);
20625 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20626 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20628 if (TARGET_XCOFF)
20629 fputs ("[DS]", stream);
20630 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20631 if (TARGET_XCOFF)
20632 fputs (rs6000_xcoff_visibility (decl), stream);
20633 #endif
20634 fputs ("\n\t.weak\t.", stream);
20635 RS6000_OUTPUT_BASENAME (stream, name);
20637 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20638 if (TARGET_XCOFF)
20639 fputs (rs6000_xcoff_visibility (decl), stream);
20640 #endif
20641 fputc ('\n', stream);
20642 if (val)
20644 #ifdef ASM_OUTPUT_DEF
20645 ASM_OUTPUT_DEF (stream, name, val);
20646 #endif
20647 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20648 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20650 fputs ("\t.set\t.", stream);
20651 RS6000_OUTPUT_BASENAME (stream, name);
20652 fputs (",.", stream);
20653 RS6000_OUTPUT_BASENAME (stream, val);
20654 fputc ('\n', stream);
20660 /* Return true if INSN should not be copied. */
20662 static bool
20663 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20665 return recog_memoized (insn) >= 0
20666 && get_attr_cannot_copy (insn);
20669 /* Compute a (partial) cost for rtx X. Return true if the complete
20670 cost has been computed, and false if subexpressions should be
20671 scanned. In either case, *TOTAL contains the cost result. */
20673 static bool
20674 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20675 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20677 int code = GET_CODE (x);
20679 switch (code)
20681 /* On the RS/6000, if it is valid in the insn, it is free. */
20682 case CONST_INT:
20683 if (((outer_code == SET
20684 || outer_code == PLUS
20685 || outer_code == MINUS)
20686 && (satisfies_constraint_I (x)
20687 || satisfies_constraint_L (x)))
20688 || (outer_code == AND
20689 && (satisfies_constraint_K (x)
20690 || (mode == SImode
20691 ? satisfies_constraint_L (x)
20692 : satisfies_constraint_J (x))))
20693 || ((outer_code == IOR || outer_code == XOR)
20694 && (satisfies_constraint_K (x)
20695 || (mode == SImode
20696 ? satisfies_constraint_L (x)
20697 : satisfies_constraint_J (x))))
20698 || outer_code == ASHIFT
20699 || outer_code == ASHIFTRT
20700 || outer_code == LSHIFTRT
20701 || outer_code == ROTATE
20702 || outer_code == ROTATERT
20703 || outer_code == ZERO_EXTRACT
20704 || (outer_code == MULT
20705 && satisfies_constraint_I (x))
20706 || ((outer_code == DIV || outer_code == UDIV
20707 || outer_code == MOD || outer_code == UMOD)
20708 && exact_log2 (INTVAL (x)) >= 0)
20709 || (outer_code == COMPARE
20710 && (satisfies_constraint_I (x)
20711 || satisfies_constraint_K (x)))
20712 || ((outer_code == EQ || outer_code == NE)
20713 && (satisfies_constraint_I (x)
20714 || satisfies_constraint_K (x)
20715 || (mode == SImode
20716 ? satisfies_constraint_L (x)
20717 : satisfies_constraint_J (x))))
20718 || (outer_code == GTU
20719 && satisfies_constraint_I (x))
20720 || (outer_code == LTU
20721 && satisfies_constraint_P (x)))
20723 *total = 0;
20724 return true;
20726 else if ((outer_code == PLUS
20727 && reg_or_add_cint_operand (x, VOIDmode))
20728 || (outer_code == MINUS
20729 && reg_or_sub_cint_operand (x, VOIDmode))
20730 || ((outer_code == SET
20731 || outer_code == IOR
20732 || outer_code == XOR)
20733 && (INTVAL (x)
20734 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20736 *total = COSTS_N_INSNS (1);
20737 return true;
20739 /* FALLTHRU */
20741 case CONST_DOUBLE:
20742 case CONST_WIDE_INT:
20743 case CONST:
20744 case HIGH:
20745 case SYMBOL_REF:
20746 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20747 return true;
20749 case MEM:
20750 /* When optimizing for size, MEM should be slightly more expensive
20751 than generating address, e.g., (plus (reg) (const)).
20752 L1 cache latency is about two instructions. */
20753 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20754 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20755 *total += COSTS_N_INSNS (100);
20756 return true;
20758 case LABEL_REF:
20759 *total = 0;
20760 return true;
20762 case PLUS:
20763 case MINUS:
20764 if (FLOAT_MODE_P (mode))
20765 *total = rs6000_cost->fp;
20766 else
20767 *total = COSTS_N_INSNS (1);
20768 return false;
20770 case MULT:
20771 if (CONST_INT_P (XEXP (x, 1))
20772 && satisfies_constraint_I (XEXP (x, 1)))
20774 if (INTVAL (XEXP (x, 1)) >= -256
20775 && INTVAL (XEXP (x, 1)) <= 255)
20776 *total = rs6000_cost->mulsi_const9;
20777 else
20778 *total = rs6000_cost->mulsi_const;
20780 else if (mode == SFmode)
20781 *total = rs6000_cost->fp;
20782 else if (FLOAT_MODE_P (mode))
20783 *total = rs6000_cost->dmul;
20784 else if (mode == DImode)
20785 *total = rs6000_cost->muldi;
20786 else
20787 *total = rs6000_cost->mulsi;
20788 return false;
20790 case FMA:
20791 if (mode == SFmode)
20792 *total = rs6000_cost->fp;
20793 else
20794 *total = rs6000_cost->dmul;
20795 break;
20797 case DIV:
20798 case MOD:
20799 if (FLOAT_MODE_P (mode))
20801 *total = mode == DFmode ? rs6000_cost->ddiv
20802 : rs6000_cost->sdiv;
20803 return false;
20805 /* FALLTHRU */
20807 case UDIV:
20808 case UMOD:
20809 if (CONST_INT_P (XEXP (x, 1))
20810 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20812 if (code == DIV || code == MOD)
20813 /* Shift, addze */
20814 *total = COSTS_N_INSNS (2);
20815 else
20816 /* Shift */
20817 *total = COSTS_N_INSNS (1);
20819 else
20821 if (GET_MODE (XEXP (x, 1)) == DImode)
20822 *total = rs6000_cost->divdi;
20823 else
20824 *total = rs6000_cost->divsi;
20826 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20827 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20828 *total += COSTS_N_INSNS (2);
20829 return false;
20831 case CTZ:
20832 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20833 return false;
20835 case FFS:
20836 *total = COSTS_N_INSNS (4);
20837 return false;
20839 case POPCOUNT:
20840 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20841 return false;
20843 case PARITY:
20844 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20845 return false;
20847 case NOT:
20848 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20849 *total = 0;
20850 else
20851 *total = COSTS_N_INSNS (1);
20852 return false;
20854 case AND:
20855 if (CONST_INT_P (XEXP (x, 1)))
20857 rtx left = XEXP (x, 0);
20858 rtx_code left_code = GET_CODE (left);
20860 /* rotate-and-mask: 1 insn. */
20861 if ((left_code == ROTATE
20862 || left_code == ASHIFT
20863 || left_code == LSHIFTRT)
20864 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20866 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20867 if (!CONST_INT_P (XEXP (left, 1)))
20868 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20869 *total += COSTS_N_INSNS (1);
20870 return true;
20873 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20874 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20875 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20876 || (val & 0xffff) == val
20877 || (val & 0xffff0000) == val
20878 || ((val & 0xffff) == 0 && mode == SImode))
20880 *total = rtx_cost (left, mode, AND, 0, speed);
20881 *total += COSTS_N_INSNS (1);
20882 return true;
20885 /* 2 insns. */
20886 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20888 *total = rtx_cost (left, mode, AND, 0, speed);
20889 *total += COSTS_N_INSNS (2);
20890 return true;
20894 *total = COSTS_N_INSNS (1);
20895 return false;
20897 case IOR:
20898 /* FIXME */
20899 *total = COSTS_N_INSNS (1);
20900 return true;
20902 case CLZ:
20903 case XOR:
20904 case ZERO_EXTRACT:
20905 *total = COSTS_N_INSNS (1);
20906 return false;
20908 case ASHIFT:
20909 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20910 the sign extend and shift separately within the insn. */
20911 if (TARGET_EXTSWSLI && mode == DImode
20912 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20913 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20915 *total = 0;
20916 return false;
20918 /* fall through */
20920 case ASHIFTRT:
20921 case LSHIFTRT:
20922 case ROTATE:
20923 case ROTATERT:
20924 /* Handle mul_highpart. */
20925 if (outer_code == TRUNCATE
20926 && GET_CODE (XEXP (x, 0)) == MULT)
20928 if (mode == DImode)
20929 *total = rs6000_cost->muldi;
20930 else
20931 *total = rs6000_cost->mulsi;
20932 return true;
20934 else if (outer_code == AND)
20935 *total = 0;
20936 else
20937 *total = COSTS_N_INSNS (1);
20938 return false;
20940 case SIGN_EXTEND:
20941 case ZERO_EXTEND:
20942 if (MEM_P (XEXP (x, 0)))
20943 *total = 0;
20944 else
20945 *total = COSTS_N_INSNS (1);
20946 return false;
20948 case COMPARE:
20949 case NEG:
20950 case ABS:
20951 if (!FLOAT_MODE_P (mode))
20953 *total = COSTS_N_INSNS (1);
20954 return false;
20956 /* FALLTHRU */
20958 case FLOAT:
20959 case UNSIGNED_FLOAT:
20960 case FIX:
20961 case UNSIGNED_FIX:
20962 case FLOAT_TRUNCATE:
20963 *total = rs6000_cost->fp;
20964 return false;
20966 case FLOAT_EXTEND:
20967 if (mode == DFmode)
20968 *total = rs6000_cost->sfdf_convert;
20969 else
20970 *total = rs6000_cost->fp;
20971 return false;
20973 case CALL:
20974 case IF_THEN_ELSE:
20975 if (!speed)
20977 *total = COSTS_N_INSNS (1);
20978 return true;
20980 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
20982 *total = rs6000_cost->fp;
20983 return false;
20985 break;
20987 case NE:
20988 case EQ:
20989 case GTU:
20990 case LTU:
20991 /* Carry bit requires mode == Pmode.
20992 NEG or PLUS already counted so only add one. */
20993 if (mode == Pmode
20994 && (outer_code == NEG || outer_code == PLUS))
20996 *total = COSTS_N_INSNS (1);
20997 return true;
20999 /* FALLTHRU */
21001 case GT:
21002 case LT:
21003 case UNORDERED:
21004 if (outer_code == SET)
21006 if (XEXP (x, 1) == const0_rtx)
21008 *total = COSTS_N_INSNS (2);
21009 return true;
21011 else
21013 *total = COSTS_N_INSNS (3);
21014 return false;
21017 /* CC COMPARE. */
21018 if (outer_code == COMPARE)
21020 *total = 0;
21021 return true;
21023 break;
21025 default:
21026 break;
21029 return false;
21032 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21034 static bool
21035 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21036 int opno, int *total, bool speed)
21038 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21040 fprintf (stderr,
21041 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21042 "opno = %d, total = %d, speed = %s, x:\n",
21043 ret ? "complete" : "scan inner",
21044 GET_MODE_NAME (mode),
21045 GET_RTX_NAME (outer_code),
21046 opno,
21047 *total,
21048 speed ? "true" : "false");
21050 debug_rtx (x);
21052 return ret;
21055 static int
21056 rs6000_insn_cost (rtx_insn *insn, bool speed)
21058 if (recog_memoized (insn) < 0)
21059 return 0;
21061 /* If we are optimizing for size, just use the length. */
21062 if (!speed)
21063 return get_attr_length (insn);
21065 /* Use the cost if provided. */
21066 int cost = get_attr_cost (insn);
21067 if (cost > 0)
21068 return cost;
21070 /* If the insn tells us how many insns there are, use that. Otherwise use
21071 the length/4. Adjust the insn length to remove the extra size that
21072 prefixed instructions take. */
21073 int n = get_attr_num_insns (insn);
21074 if (n == 0)
21076 int length = get_attr_length (insn);
21077 if (get_attr_prefixed (insn) == PREFIXED_YES)
21079 int adjust = 0;
21080 ADJUST_INSN_LENGTH (insn, adjust);
21081 length -= adjust;
21084 n = length / 4;
21087 enum attr_type type = get_attr_type (insn);
21089 switch (type)
21091 case TYPE_LOAD:
21092 case TYPE_FPLOAD:
21093 case TYPE_VECLOAD:
21094 cost = COSTS_N_INSNS (n + 1);
21095 break;
21097 case TYPE_MUL:
21098 switch (get_attr_size (insn))
21100 case SIZE_8:
21101 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21102 break;
21103 case SIZE_16:
21104 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21105 break;
21106 case SIZE_32:
21107 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21108 break;
21109 case SIZE_64:
21110 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21111 break;
21112 default:
21113 gcc_unreachable ();
21115 break;
21116 case TYPE_DIV:
21117 switch (get_attr_size (insn))
21119 case SIZE_32:
21120 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21121 break;
21122 case SIZE_64:
21123 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21124 break;
21125 default:
21126 gcc_unreachable ();
21128 break;
21130 case TYPE_FP:
21131 cost = n * rs6000_cost->fp;
21132 break;
21133 case TYPE_DMUL:
21134 cost = n * rs6000_cost->dmul;
21135 break;
21136 case TYPE_SDIV:
21137 cost = n * rs6000_cost->sdiv;
21138 break;
21139 case TYPE_DDIV:
21140 cost = n * rs6000_cost->ddiv;
21141 break;
21143 case TYPE_SYNC:
21144 case TYPE_LOAD_L:
21145 case TYPE_MFCR:
21146 case TYPE_MFCRF:
21147 cost = COSTS_N_INSNS (n + 2);
21148 break;
21150 default:
21151 cost = COSTS_N_INSNS (n);
21154 return cost;
21157 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21159 static int
21160 rs6000_debug_address_cost (rtx x, machine_mode mode,
21161 addr_space_t as, bool speed)
21163 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21165 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21166 ret, speed ? "true" : "false");
21167 debug_rtx (x);
21169 return ret;
21173 /* A C expression returning the cost of moving data from a register of class
21174 CLASS1 to one of CLASS2. */
21176 static int
21177 rs6000_register_move_cost (machine_mode mode,
21178 reg_class_t from, reg_class_t to)
21180 int ret;
21181 reg_class_t rclass;
21183 if (TARGET_DEBUG_COST)
21184 dbg_cost_ctrl++;
21186 /* If we have VSX, we can easily move between FPR or Altivec registers,
21187 otherwise we can only easily move within classes.
21188 Do this first so we give best-case answers for union classes
21189 containing both gprs and vsx regs. */
21190 HARD_REG_SET to_vsx, from_vsx;
21191 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21192 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21193 if (!hard_reg_set_empty_p (to_vsx)
21194 && !hard_reg_set_empty_p (from_vsx)
21195 && (TARGET_VSX
21196 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21198 int reg = FIRST_FPR_REGNO;
21199 if (TARGET_VSX
21200 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21201 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21202 reg = FIRST_ALTIVEC_REGNO;
21203 ret = 2 * hard_regno_nregs (reg, mode);
21206 /* Moves from/to GENERAL_REGS. */
21207 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21208 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21210 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21212 if (TARGET_DIRECT_MOVE)
21214 /* Keep the cost for direct moves above that for within
21215 a register class even if the actual processor cost is
21216 comparable. We do this because a direct move insn
21217 can't be a nop, whereas with ideal register
21218 allocation a move within the same class might turn
21219 out to be a nop. */
21220 if (rs6000_tune == PROCESSOR_POWER9
21221 || rs6000_tune == PROCESSOR_FUTURE)
21222 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21223 else
21224 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21225 /* SFmode requires a conversion when moving between gprs
21226 and vsx. */
21227 if (mode == SFmode)
21228 ret += 2;
21230 else
21231 ret = (rs6000_memory_move_cost (mode, rclass, false)
21232 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21235 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21236 shift. */
21237 else if (rclass == CR_REGS)
21238 ret = 4;
21240 /* For those processors that have slow LR/CTR moves, make them more
21241 expensive than memory in order to bias spills to memory .*/
21242 else if ((rs6000_tune == PROCESSOR_POWER6
21243 || rs6000_tune == PROCESSOR_POWER7
21244 || rs6000_tune == PROCESSOR_POWER8
21245 || rs6000_tune == PROCESSOR_POWER9)
21246 && reg_class_subset_p (rclass, SPECIAL_REGS))
21247 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21249 else
21250 /* A move will cost one instruction per GPR moved. */
21251 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21254 /* Everything else has to go through GENERAL_REGS. */
21255 else
21256 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21257 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21259 if (TARGET_DEBUG_COST)
21261 if (dbg_cost_ctrl == 1)
21262 fprintf (stderr,
21263 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21264 ret, GET_MODE_NAME (mode), reg_class_names[from],
21265 reg_class_names[to]);
21266 dbg_cost_ctrl--;
21269 return ret;
21272 /* A C expressions returning the cost of moving data of MODE from a register to
21273 or from memory. */
21275 static int
21276 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21277 bool in ATTRIBUTE_UNUSED)
21279 int ret;
21281 if (TARGET_DEBUG_COST)
21282 dbg_cost_ctrl++;
21284 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21285 ret = 4 * hard_regno_nregs (0, mode);
21286 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21287 || reg_classes_intersect_p (rclass, VSX_REGS)))
21288 ret = 4 * hard_regno_nregs (32, mode);
21289 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21290 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21291 else
21292 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21294 if (TARGET_DEBUG_COST)
21296 if (dbg_cost_ctrl == 1)
21297 fprintf (stderr,
21298 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21299 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21300 dbg_cost_ctrl--;
21303 return ret;
21306 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21308 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21309 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21310 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21311 move cost between GENERAL_REGS and VSX_REGS low.
21313 It might seem reasonable to use a union class. After all, if usage
21314 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21315 rather than memory. However, in cases where register pressure of
21316 both is high, like the cactus_adm spec test, allowing
21317 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21318 the first scheduling pass. This is partly due to an allocno of
21319 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21320 class, which gives too high a pressure for GENERAL_REGS and too low
21321 for VSX_REGS. So, force a choice of the subclass here.
21323 The best class is also the union if GENERAL_REGS and VSX_REGS have
21324 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21325 allocno class, since trying to narrow down the class by regno mode
21326 is prone to error. For example, SImode is allowed in VSX regs and
21327 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21328 it would be wrong to choose an allocno of GENERAL_REGS based on
21329 SImode. */
21331 static reg_class_t
21332 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21333 reg_class_t allocno_class,
21334 reg_class_t best_class)
21336 switch (allocno_class)
21338 case GEN_OR_VSX_REGS:
21339 /* best_class must be a subset of allocno_class. */
21340 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21341 || best_class == GEN_OR_FLOAT_REGS
21342 || best_class == VSX_REGS
21343 || best_class == ALTIVEC_REGS
21344 || best_class == FLOAT_REGS
21345 || best_class == GENERAL_REGS
21346 || best_class == BASE_REGS);
21347 /* Use best_class but choose wider classes when copying from the
21348 wider class to best_class is cheap. This mimics IRA choice
21349 of allocno class. */
21350 if (best_class == BASE_REGS)
21351 return GENERAL_REGS;
21352 if (TARGET_VSX
21353 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21354 return VSX_REGS;
21355 return best_class;
21357 default:
21358 break;
21361 return allocno_class;
21364 /* Returns a code for a target-specific builtin that implements
21365 reciprocal of the function, or NULL_TREE if not available. */
21367 static tree
21368 rs6000_builtin_reciprocal (tree fndecl)
21370 switch (DECL_MD_FUNCTION_CODE (fndecl))
21372 case VSX_BUILTIN_XVSQRTDP:
21373 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21374 return NULL_TREE;
21376 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21378 case VSX_BUILTIN_XVSQRTSP:
21379 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21380 return NULL_TREE;
21382 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21384 default:
21385 return NULL_TREE;
21389 /* Load up a constant. If the mode is a vector mode, splat the value across
21390 all of the vector elements. */
21392 static rtx
21393 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21395 rtx reg;
21397 if (mode == SFmode || mode == DFmode)
21399 rtx d = const_double_from_real_value (dconst, mode);
21400 reg = force_reg (mode, d);
21402 else if (mode == V4SFmode)
21404 rtx d = const_double_from_real_value (dconst, SFmode);
21405 rtvec v = gen_rtvec (4, d, d, d, d);
21406 reg = gen_reg_rtx (mode);
21407 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21409 else if (mode == V2DFmode)
21411 rtx d = const_double_from_real_value (dconst, DFmode);
21412 rtvec v = gen_rtvec (2, d, d);
21413 reg = gen_reg_rtx (mode);
21414 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21416 else
21417 gcc_unreachable ();
21419 return reg;
21422 /* Generate an FMA instruction. */
21424 static void
21425 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21427 machine_mode mode = GET_MODE (target);
21428 rtx dst;
21430 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21431 gcc_assert (dst != NULL);
21433 if (dst != target)
21434 emit_move_insn (target, dst);
21437 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21439 static void
21440 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21442 machine_mode mode = GET_MODE (dst);
21443 rtx r;
21445 /* This is a tad more complicated, since the fnma_optab is for
21446 a different expression: fma(-m1, m2, a), which is the same
21447 thing except in the case of signed zeros.
21449 Fortunately we know that if FMA is supported that FNMSUB is
21450 also supported in the ISA. Just expand it directly. */
21452 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21454 r = gen_rtx_NEG (mode, a);
21455 r = gen_rtx_FMA (mode, m1, m2, r);
21456 r = gen_rtx_NEG (mode, r);
21457 emit_insn (gen_rtx_SET (dst, r));
21460 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21461 add a reg_note saying that this was a division. Support both scalar and
21462 vector divide. Assumes no trapping math and finite arguments. */
21464 void
21465 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21467 machine_mode mode = GET_MODE (dst);
21468 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21469 int i;
21471 /* Low precision estimates guarantee 5 bits of accuracy. High
21472 precision estimates guarantee 14 bits of accuracy. SFmode
21473 requires 23 bits of accuracy. DFmode requires 52 bits of
21474 accuracy. Each pass at least doubles the accuracy, leading
21475 to the following. */
21476 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21477 if (mode == DFmode || mode == V2DFmode)
21478 passes++;
21480 enum insn_code code = optab_handler (smul_optab, mode);
21481 insn_gen_fn gen_mul = GEN_FCN (code);
21483 gcc_assert (code != CODE_FOR_nothing);
21485 one = rs6000_load_constant_and_splat (mode, dconst1);
21487 /* x0 = 1./d estimate */
21488 x0 = gen_reg_rtx (mode);
21489 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21490 UNSPEC_FRES)));
21492 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21493 if (passes > 1) {
21495 /* e0 = 1. - d * x0 */
21496 e0 = gen_reg_rtx (mode);
21497 rs6000_emit_nmsub (e0, d, x0, one);
21499 /* x1 = x0 + e0 * x0 */
21500 x1 = gen_reg_rtx (mode);
21501 rs6000_emit_madd (x1, e0, x0, x0);
21503 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21504 ++i, xprev = xnext, eprev = enext) {
21506 /* enext = eprev * eprev */
21507 enext = gen_reg_rtx (mode);
21508 emit_insn (gen_mul (enext, eprev, eprev));
21510 /* xnext = xprev + enext * xprev */
21511 xnext = gen_reg_rtx (mode);
21512 rs6000_emit_madd (xnext, enext, xprev, xprev);
21515 } else
21516 xprev = x0;
21518 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21520 /* u = n * xprev */
21521 u = gen_reg_rtx (mode);
21522 emit_insn (gen_mul (u, n, xprev));
21524 /* v = n - (d * u) */
21525 v = gen_reg_rtx (mode);
21526 rs6000_emit_nmsub (v, d, u, n);
21528 /* dst = (v * xprev) + u */
21529 rs6000_emit_madd (dst, v, xprev, u);
21531 if (note_p)
21532 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21535 /* Goldschmidt's Algorithm for single/double-precision floating point
21536 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21538 void
21539 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21541 machine_mode mode = GET_MODE (src);
21542 rtx e = gen_reg_rtx (mode);
21543 rtx g = gen_reg_rtx (mode);
21544 rtx h = gen_reg_rtx (mode);
21546 /* Low precision estimates guarantee 5 bits of accuracy. High
21547 precision estimates guarantee 14 bits of accuracy. SFmode
21548 requires 23 bits of accuracy. DFmode requires 52 bits of
21549 accuracy. Each pass at least doubles the accuracy, leading
21550 to the following. */
21551 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21552 if (mode == DFmode || mode == V2DFmode)
21553 passes++;
21555 int i;
21556 rtx mhalf;
21557 enum insn_code code = optab_handler (smul_optab, mode);
21558 insn_gen_fn gen_mul = GEN_FCN (code);
21560 gcc_assert (code != CODE_FOR_nothing);
21562 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21564 /* e = rsqrt estimate */
21565 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21566 UNSPEC_RSQRT)));
21568 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21569 if (!recip)
21571 rtx zero = force_reg (mode, CONST0_RTX (mode));
21573 if (mode == SFmode)
21575 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21576 e, zero, mode, 0);
21577 if (target != e)
21578 emit_move_insn (e, target);
21580 else
21582 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21583 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21587 /* g = sqrt estimate. */
21588 emit_insn (gen_mul (g, e, src));
21589 /* h = 1/(2*sqrt) estimate. */
21590 emit_insn (gen_mul (h, e, mhalf));
21592 if (recip)
21594 if (passes == 1)
21596 rtx t = gen_reg_rtx (mode);
21597 rs6000_emit_nmsub (t, g, h, mhalf);
21598 /* Apply correction directly to 1/rsqrt estimate. */
21599 rs6000_emit_madd (dst, e, t, e);
21601 else
21603 for (i = 0; i < passes; i++)
21605 rtx t1 = gen_reg_rtx (mode);
21606 rtx g1 = gen_reg_rtx (mode);
21607 rtx h1 = gen_reg_rtx (mode);
21609 rs6000_emit_nmsub (t1, g, h, mhalf);
21610 rs6000_emit_madd (g1, g, t1, g);
21611 rs6000_emit_madd (h1, h, t1, h);
21613 g = g1;
21614 h = h1;
21616 /* Multiply by 2 for 1/rsqrt. */
21617 emit_insn (gen_add3_insn (dst, h, h));
21620 else
21622 rtx t = gen_reg_rtx (mode);
21623 rs6000_emit_nmsub (t, g, h, mhalf);
21624 rs6000_emit_madd (dst, g, t, g);
21627 return;
21630 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21631 (Power7) targets. DST is the target, and SRC is the argument operand. */
21633 void
21634 rs6000_emit_popcount (rtx dst, rtx src)
21636 machine_mode mode = GET_MODE (dst);
21637 rtx tmp1, tmp2;
21639 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21640 if (TARGET_POPCNTD)
21642 if (mode == SImode)
21643 emit_insn (gen_popcntdsi2 (dst, src));
21644 else
21645 emit_insn (gen_popcntddi2 (dst, src));
21646 return;
21649 tmp1 = gen_reg_rtx (mode);
21651 if (mode == SImode)
21653 emit_insn (gen_popcntbsi2 (tmp1, src));
21654 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21655 NULL_RTX, 0);
21656 tmp2 = force_reg (SImode, tmp2);
21657 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21659 else
21661 emit_insn (gen_popcntbdi2 (tmp1, src));
21662 tmp2 = expand_mult (DImode, tmp1,
21663 GEN_INT ((HOST_WIDE_INT)
21664 0x01010101 << 32 | 0x01010101),
21665 NULL_RTX, 0);
21666 tmp2 = force_reg (DImode, tmp2);
21667 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21672 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21673 target, and SRC is the argument operand. */
21675 void
21676 rs6000_emit_parity (rtx dst, rtx src)
21678 machine_mode mode = GET_MODE (dst);
21679 rtx tmp;
21681 tmp = gen_reg_rtx (mode);
21683 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21684 if (TARGET_CMPB)
21686 if (mode == SImode)
21688 emit_insn (gen_popcntbsi2 (tmp, src));
21689 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21691 else
21693 emit_insn (gen_popcntbdi2 (tmp, src));
21694 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21696 return;
21699 if (mode == SImode)
21701 /* Is mult+shift >= shift+xor+shift+xor? */
21702 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21704 rtx tmp1, tmp2, tmp3, tmp4;
21706 tmp1 = gen_reg_rtx (SImode);
21707 emit_insn (gen_popcntbsi2 (tmp1, src));
21709 tmp2 = gen_reg_rtx (SImode);
21710 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21711 tmp3 = gen_reg_rtx (SImode);
21712 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21714 tmp4 = gen_reg_rtx (SImode);
21715 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21716 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21718 else
21719 rs6000_emit_popcount (tmp, src);
21720 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21722 else
21724 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21725 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21727 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21729 tmp1 = gen_reg_rtx (DImode);
21730 emit_insn (gen_popcntbdi2 (tmp1, src));
21732 tmp2 = gen_reg_rtx (DImode);
21733 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21734 tmp3 = gen_reg_rtx (DImode);
21735 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21737 tmp4 = gen_reg_rtx (DImode);
21738 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21739 tmp5 = gen_reg_rtx (DImode);
21740 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21742 tmp6 = gen_reg_rtx (DImode);
21743 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21744 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21746 else
21747 rs6000_emit_popcount (tmp, src);
21748 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21752 /* Expand an Altivec constant permutation for little endian mode.
21753 OP0 and OP1 are the input vectors and TARGET is the output vector.
21754 SEL specifies the constant permutation vector.
21756 There are two issues: First, the two input operands must be
21757 swapped so that together they form a double-wide array in LE
21758 order. Second, the vperm instruction has surprising behavior
21759 in LE mode: it interprets the elements of the source vectors
21760 in BE mode ("left to right") and interprets the elements of
21761 the destination vector in LE mode ("right to left"). To
21762 correct for this, we must subtract each element of the permute
21763 control vector from 31.
21765 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21766 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21767 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21768 serve as the permute control vector. Then, in BE mode,
21770 vperm 9,10,11,12
21772 places the desired result in vr9. However, in LE mode the
21773 vector contents will be
21775 vr10 = 00000003 00000002 00000001 00000000
21776 vr11 = 00000007 00000006 00000005 00000004
21778 The result of the vperm using the same permute control vector is
21780 vr9 = 05000000 07000000 01000000 03000000
21782 That is, the leftmost 4 bytes of vr10 are interpreted as the
21783 source for the rightmost 4 bytes of vr9, and so on.
21785 If we change the permute control vector to
21787 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21789 and issue
21791 vperm 9,11,10,12
21793 we get the desired
21795 vr9 = 00000006 00000004 00000002 00000000. */
21797 static void
21798 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21799 const vec_perm_indices &sel)
21801 unsigned int i;
21802 rtx perm[16];
21803 rtx constv, unspec;
21805 /* Unpack and adjust the constant selector. */
21806 for (i = 0; i < 16; ++i)
21808 unsigned int elt = 31 - (sel[i] & 31);
21809 perm[i] = GEN_INT (elt);
21812 /* Expand to a permute, swapping the inputs and using the
21813 adjusted selector. */
21814 if (!REG_P (op0))
21815 op0 = force_reg (V16QImode, op0);
21816 if (!REG_P (op1))
21817 op1 = force_reg (V16QImode, op1);
21819 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21820 constv = force_reg (V16QImode, constv);
21821 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21822 UNSPEC_VPERM);
21823 if (!REG_P (target))
21825 rtx tmp = gen_reg_rtx (V16QImode);
21826 emit_move_insn (tmp, unspec);
21827 unspec = tmp;
21830 emit_move_insn (target, unspec);
21833 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21834 permute control vector. But here it's not a constant, so we must
21835 generate a vector NAND or NOR to do the adjustment. */
21837 void
21838 altivec_expand_vec_perm_le (rtx operands[4])
21840 rtx notx, iorx, unspec;
21841 rtx target = operands[0];
21842 rtx op0 = operands[1];
21843 rtx op1 = operands[2];
21844 rtx sel = operands[3];
21845 rtx tmp = target;
21846 rtx norreg = gen_reg_rtx (V16QImode);
21847 machine_mode mode = GET_MODE (target);
21849 /* Get everything in regs so the pattern matches. */
21850 if (!REG_P (op0))
21851 op0 = force_reg (mode, op0);
21852 if (!REG_P (op1))
21853 op1 = force_reg (mode, op1);
21854 if (!REG_P (sel))
21855 sel = force_reg (V16QImode, sel);
21856 if (!REG_P (target))
21857 tmp = gen_reg_rtx (mode);
21859 if (TARGET_P9_VECTOR)
21861 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21862 UNSPEC_VPERMR);
21864 else
21866 /* Invert the selector with a VNAND if available, else a VNOR.
21867 The VNAND is preferred for future fusion opportunities. */
21868 notx = gen_rtx_NOT (V16QImode, sel);
21869 iorx = (TARGET_P8_VECTOR
21870 ? gen_rtx_IOR (V16QImode, notx, notx)
21871 : gen_rtx_AND (V16QImode, notx, notx));
21872 emit_insn (gen_rtx_SET (norreg, iorx));
21874 /* Permute with operands reversed and adjusted selector. */
21875 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21876 UNSPEC_VPERM);
21879 /* Copy into target, possibly by way of a register. */
21880 if (!REG_P (target))
21882 emit_move_insn (tmp, unspec);
21883 unspec = tmp;
21886 emit_move_insn (target, unspec);
21889 /* Expand an Altivec constant permutation. Return true if we match
21890 an efficient implementation; false to fall back to VPERM.
21892 OP0 and OP1 are the input vectors and TARGET is the output vector.
21893 SEL specifies the constant permutation vector. */
21895 static bool
21896 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21897 const vec_perm_indices &sel)
21899 struct altivec_perm_insn {
21900 HOST_WIDE_INT mask;
21901 enum insn_code impl;
21902 unsigned char perm[16];
21904 static const struct altivec_perm_insn patterns[] = {
21905 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21906 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21907 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21908 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21909 { OPTION_MASK_ALTIVEC,
21910 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21911 : CODE_FOR_altivec_vmrglb_direct),
21912 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21913 { OPTION_MASK_ALTIVEC,
21914 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21915 : CODE_FOR_altivec_vmrglh_direct),
21916 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21917 { OPTION_MASK_ALTIVEC,
21918 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21919 : CODE_FOR_altivec_vmrglw_direct),
21920 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21921 { OPTION_MASK_ALTIVEC,
21922 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21923 : CODE_FOR_altivec_vmrghb_direct),
21924 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21925 { OPTION_MASK_ALTIVEC,
21926 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21927 : CODE_FOR_altivec_vmrghh_direct),
21928 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21929 { OPTION_MASK_ALTIVEC,
21930 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21931 : CODE_FOR_altivec_vmrghw_direct),
21932 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21933 { OPTION_MASK_P8_VECTOR,
21934 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21935 : CODE_FOR_p8_vmrgow_v4sf_direct),
21936 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21937 { OPTION_MASK_P8_VECTOR,
21938 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21939 : CODE_FOR_p8_vmrgew_v4sf_direct),
21940 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21943 unsigned int i, j, elt, which;
21944 unsigned char perm[16];
21945 rtx x;
21946 bool one_vec;
21948 /* Unpack the constant selector. */
21949 for (i = which = 0; i < 16; ++i)
21951 elt = sel[i] & 31;
21952 which |= (elt < 16 ? 1 : 2);
21953 perm[i] = elt;
21956 /* Simplify the constant selector based on operands. */
21957 switch (which)
21959 default:
21960 gcc_unreachable ();
21962 case 3:
21963 one_vec = false;
21964 if (!rtx_equal_p (op0, op1))
21965 break;
21966 /* FALLTHRU */
21968 case 2:
21969 for (i = 0; i < 16; ++i)
21970 perm[i] &= 15;
21971 op0 = op1;
21972 one_vec = true;
21973 break;
21975 case 1:
21976 op1 = op0;
21977 one_vec = true;
21978 break;
21981 /* Look for splat patterns. */
21982 if (one_vec)
21984 elt = perm[0];
21986 for (i = 0; i < 16; ++i)
21987 if (perm[i] != elt)
21988 break;
21989 if (i == 16)
21991 if (!BYTES_BIG_ENDIAN)
21992 elt = 15 - elt;
21993 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
21994 return true;
21997 if (elt % 2 == 0)
21999 for (i = 0; i < 16; i += 2)
22000 if (perm[i] != elt || perm[i + 1] != elt + 1)
22001 break;
22002 if (i == 16)
22004 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22005 x = gen_reg_rtx (V8HImode);
22006 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22007 GEN_INT (field)));
22008 emit_move_insn (target, gen_lowpart (V16QImode, x));
22009 return true;
22013 if (elt % 4 == 0)
22015 for (i = 0; i < 16; i += 4)
22016 if (perm[i] != elt
22017 || perm[i + 1] != elt + 1
22018 || perm[i + 2] != elt + 2
22019 || perm[i + 3] != elt + 3)
22020 break;
22021 if (i == 16)
22023 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22024 x = gen_reg_rtx (V4SImode);
22025 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22026 GEN_INT (field)));
22027 emit_move_insn (target, gen_lowpart (V16QImode, x));
22028 return true;
22033 /* Look for merge and pack patterns. */
22034 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22036 bool swapped;
22038 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22039 continue;
22041 elt = patterns[j].perm[0];
22042 if (perm[0] == elt)
22043 swapped = false;
22044 else if (perm[0] == elt + 16)
22045 swapped = true;
22046 else
22047 continue;
22048 for (i = 1; i < 16; ++i)
22050 elt = patterns[j].perm[i];
22051 if (swapped)
22052 elt = (elt >= 16 ? elt - 16 : elt + 16);
22053 else if (one_vec && elt >= 16)
22054 elt -= 16;
22055 if (perm[i] != elt)
22056 break;
22058 if (i == 16)
22060 enum insn_code icode = patterns[j].impl;
22061 machine_mode omode = insn_data[icode].operand[0].mode;
22062 machine_mode imode = insn_data[icode].operand[1].mode;
22064 /* For little-endian, don't use vpkuwum and vpkuhum if the
22065 underlying vector type is not V4SI and V8HI, respectively.
22066 For example, using vpkuwum with a V8HI picks up the even
22067 halfwords (BE numbering) when the even halfwords (LE
22068 numbering) are what we need. */
22069 if (!BYTES_BIG_ENDIAN
22070 && icode == CODE_FOR_altivec_vpkuwum_direct
22071 && ((REG_P (op0)
22072 && GET_MODE (op0) != V4SImode)
22073 || (SUBREG_P (op0)
22074 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22075 continue;
22076 if (!BYTES_BIG_ENDIAN
22077 && icode == CODE_FOR_altivec_vpkuhum_direct
22078 && ((REG_P (op0)
22079 && GET_MODE (op0) != V8HImode)
22080 || (SUBREG_P (op0)
22081 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22082 continue;
22084 /* For little-endian, the two input operands must be swapped
22085 (or swapped back) to ensure proper right-to-left numbering
22086 from 0 to 2N-1. */
22087 if (swapped ^ !BYTES_BIG_ENDIAN)
22088 std::swap (op0, op1);
22089 if (imode != V16QImode)
22091 op0 = gen_lowpart (imode, op0);
22092 op1 = gen_lowpart (imode, op1);
22094 if (omode == V16QImode)
22095 x = target;
22096 else
22097 x = gen_reg_rtx (omode);
22098 emit_insn (GEN_FCN (icode) (x, op0, op1));
22099 if (omode != V16QImode)
22100 emit_move_insn (target, gen_lowpart (V16QImode, x));
22101 return true;
22105 if (!BYTES_BIG_ENDIAN)
22107 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22108 return true;
22111 return false;
22114 /* Expand a VSX Permute Doubleword constant permutation.
22115 Return true if we match an efficient implementation. */
22117 static bool
22118 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22119 unsigned char perm0, unsigned char perm1)
22121 rtx x;
22123 /* If both selectors come from the same operand, fold to single op. */
22124 if ((perm0 & 2) == (perm1 & 2))
22126 if (perm0 & 2)
22127 op0 = op1;
22128 else
22129 op1 = op0;
22131 /* If both operands are equal, fold to simpler permutation. */
22132 if (rtx_equal_p (op0, op1))
22134 perm0 = perm0 & 1;
22135 perm1 = (perm1 & 1) + 2;
22137 /* If the first selector comes from the second operand, swap. */
22138 else if (perm0 & 2)
22140 if (perm1 & 2)
22141 return false;
22142 perm0 -= 2;
22143 perm1 += 2;
22144 std::swap (op0, op1);
22146 /* If the second selector does not come from the second operand, fail. */
22147 else if ((perm1 & 2) == 0)
22148 return false;
22150 /* Success! */
22151 if (target != NULL)
22153 machine_mode vmode, dmode;
22154 rtvec v;
22156 vmode = GET_MODE (target);
22157 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22158 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22159 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22160 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22161 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22162 emit_insn (gen_rtx_SET (target, x));
22164 return true;
22167 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22169 static bool
22170 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22171 rtx op1, const vec_perm_indices &sel)
22173 bool testing_p = !target;
22175 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22176 if (TARGET_ALTIVEC && testing_p)
22177 return true;
22179 /* Check for ps_merge* or xxpermdi insns. */
22180 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22182 if (testing_p)
22184 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22185 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22187 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22188 return true;
22191 if (TARGET_ALTIVEC)
22193 /* Force the target-independent code to lower to V16QImode. */
22194 if (vmode != V16QImode)
22195 return false;
22196 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22197 return true;
22200 return false;
22203 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22204 OP0 and OP1 are the input vectors and TARGET is the output vector.
22205 PERM specifies the constant permutation vector. */
22207 static void
22208 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22209 machine_mode vmode, const vec_perm_builder &perm)
22211 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22212 if (x != target)
22213 emit_move_insn (target, x);
22216 /* Expand an extract even operation. */
22218 void
22219 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22221 machine_mode vmode = GET_MODE (target);
22222 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22223 vec_perm_builder perm (nelt, nelt, 1);
22225 for (i = 0; i < nelt; i++)
22226 perm.quick_push (i * 2);
22228 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22231 /* Expand a vector interleave operation. */
22233 void
22234 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22236 machine_mode vmode = GET_MODE (target);
22237 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22238 vec_perm_builder perm (nelt, nelt, 1);
22240 high = (highp ? 0 : nelt / 2);
22241 for (i = 0; i < nelt / 2; i++)
22243 perm.quick_push (i + high);
22244 perm.quick_push (i + nelt + high);
22247 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22250 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22251 void
22252 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22254 HOST_WIDE_INT hwi_scale (scale);
22255 REAL_VALUE_TYPE r_pow;
22256 rtvec v = rtvec_alloc (2);
22257 rtx elt;
22258 rtx scale_vec = gen_reg_rtx (V2DFmode);
22259 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22260 elt = const_double_from_real_value (r_pow, DFmode);
22261 RTVEC_ELT (v, 0) = elt;
22262 RTVEC_ELT (v, 1) = elt;
22263 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22264 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22267 /* Return an RTX representing where to find the function value of a
22268 function returning MODE. */
22269 static rtx
22270 rs6000_complex_function_value (machine_mode mode)
22272 unsigned int regno;
22273 rtx r1, r2;
22274 machine_mode inner = GET_MODE_INNER (mode);
22275 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22277 if (TARGET_FLOAT128_TYPE
22278 && (mode == KCmode
22279 || (mode == TCmode && TARGET_IEEEQUAD)))
22280 regno = ALTIVEC_ARG_RETURN;
22282 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22283 regno = FP_ARG_RETURN;
22285 else
22287 regno = GP_ARG_RETURN;
22289 /* 32-bit is OK since it'll go in r3/r4. */
22290 if (TARGET_32BIT && inner_bytes >= 4)
22291 return gen_rtx_REG (mode, regno);
22294 if (inner_bytes >= 8)
22295 return gen_rtx_REG (mode, regno);
22297 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22298 const0_rtx);
22299 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22300 GEN_INT (inner_bytes));
22301 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22304 /* Return an rtx describing a return value of MODE as a PARALLEL
22305 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22306 stride REG_STRIDE. */
22308 static rtx
22309 rs6000_parallel_return (machine_mode mode,
22310 int n_elts, machine_mode elt_mode,
22311 unsigned int regno, unsigned int reg_stride)
22313 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22315 int i;
22316 for (i = 0; i < n_elts; i++)
22318 rtx r = gen_rtx_REG (elt_mode, regno);
22319 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22320 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22321 regno += reg_stride;
22324 return par;
22327 /* Target hook for TARGET_FUNCTION_VALUE.
22329 An integer value is in r3 and a floating-point value is in fp1,
22330 unless -msoft-float. */
22332 static rtx
22333 rs6000_function_value (const_tree valtype,
22334 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22335 bool outgoing ATTRIBUTE_UNUSED)
22337 machine_mode mode;
22338 unsigned int regno;
22339 machine_mode elt_mode;
22340 int n_elts;
22342 /* Special handling for structs in darwin64. */
22343 if (TARGET_MACHO
22344 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22346 CUMULATIVE_ARGS valcum;
22347 rtx valret;
22349 valcum.words = 0;
22350 valcum.fregno = FP_ARG_MIN_REG;
22351 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22352 /* Do a trial code generation as if this were going to be passed as
22353 an argument; if any part goes in memory, we return NULL. */
22354 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22355 if (valret)
22356 return valret;
22357 /* Otherwise fall through to standard ABI rules. */
22360 mode = TYPE_MODE (valtype);
22362 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22363 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22365 int first_reg, n_regs;
22367 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22369 /* _Decimal128 must use even/odd register pairs. */
22370 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22371 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22373 else
22375 first_reg = ALTIVEC_ARG_RETURN;
22376 n_regs = 1;
22379 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22382 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22383 if (TARGET_32BIT && TARGET_POWERPC64)
22384 switch (mode)
22386 default:
22387 break;
22388 case E_DImode:
22389 case E_SCmode:
22390 case E_DCmode:
22391 case E_TCmode:
22392 int count = GET_MODE_SIZE (mode) / 4;
22393 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22396 if ((INTEGRAL_TYPE_P (valtype)
22397 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22398 || POINTER_TYPE_P (valtype))
22399 mode = TARGET_32BIT ? SImode : DImode;
22401 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22402 /* _Decimal128 must use an even/odd register pair. */
22403 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22404 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22405 && !FLOAT128_VECTOR_P (mode))
22406 regno = FP_ARG_RETURN;
22407 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22408 && targetm.calls.split_complex_arg)
22409 return rs6000_complex_function_value (mode);
22410 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22411 return register is used in both cases, and we won't see V2DImode/V2DFmode
22412 for pure altivec, combine the two cases. */
22413 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22414 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22415 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22416 regno = ALTIVEC_ARG_RETURN;
22417 else
22418 regno = GP_ARG_RETURN;
22420 return gen_rtx_REG (mode, regno);
22423 /* Define how to find the value returned by a library function
22424 assuming the value has mode MODE. */
22426 rs6000_libcall_value (machine_mode mode)
22428 unsigned int regno;
22430 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22431 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22432 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22434 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22435 /* _Decimal128 must use an even/odd register pair. */
22436 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22437 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22438 regno = FP_ARG_RETURN;
22439 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22440 return register is used in both cases, and we won't see V2DImode/V2DFmode
22441 for pure altivec, combine the two cases. */
22442 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22443 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22444 regno = ALTIVEC_ARG_RETURN;
22445 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22446 return rs6000_complex_function_value (mode);
22447 else
22448 regno = GP_ARG_RETURN;
22450 return gen_rtx_REG (mode, regno);
22453 /* Compute register pressure classes. We implement the target hook to avoid
22454 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22455 lead to incorrect estimates of number of available registers and therefor
22456 increased register pressure/spill. */
22457 static int
22458 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22460 int n;
22462 n = 0;
22463 pressure_classes[n++] = GENERAL_REGS;
22464 if (TARGET_VSX)
22465 pressure_classes[n++] = VSX_REGS;
22466 else
22468 if (TARGET_ALTIVEC)
22469 pressure_classes[n++] = ALTIVEC_REGS;
22470 if (TARGET_HARD_FLOAT)
22471 pressure_classes[n++] = FLOAT_REGS;
22473 pressure_classes[n++] = CR_REGS;
22474 pressure_classes[n++] = SPECIAL_REGS;
22476 return n;
22479 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22480 Frame pointer elimination is automatically handled.
22482 For the RS/6000, if frame pointer elimination is being done, we would like
22483 to convert ap into fp, not sp.
22485 We need r30 if -mminimal-toc was specified, and there are constant pool
22486 references. */
22488 static bool
22489 rs6000_can_eliminate (const int from, const int to)
22491 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22492 ? ! frame_pointer_needed
22493 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22494 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22495 || constant_pool_empty_p ()
22496 : true);
22499 /* Define the offset between two registers, FROM to be eliminated and its
22500 replacement TO, at the start of a routine. */
22501 HOST_WIDE_INT
22502 rs6000_initial_elimination_offset (int from, int to)
22504 rs6000_stack_t *info = rs6000_stack_info ();
22505 HOST_WIDE_INT offset;
22507 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22508 offset = info->push_p ? 0 : -info->total_size;
22509 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22511 offset = info->push_p ? 0 : -info->total_size;
22512 if (FRAME_GROWS_DOWNWARD)
22513 offset += info->fixed_size + info->vars_size + info->parm_size;
22515 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22516 offset = FRAME_GROWS_DOWNWARD
22517 ? info->fixed_size + info->vars_size + info->parm_size
22518 : 0;
22519 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22520 offset = info->total_size;
22521 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22522 offset = info->push_p ? info->total_size : 0;
22523 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22524 offset = 0;
22525 else
22526 gcc_unreachable ();
22528 return offset;
22531 /* Fill in sizes of registers used by unwinder. */
22533 static void
22534 rs6000_init_dwarf_reg_sizes_extra (tree address)
22536 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22538 int i;
22539 machine_mode mode = TYPE_MODE (char_type_node);
22540 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22541 rtx mem = gen_rtx_MEM (BLKmode, addr);
22542 rtx value = gen_int_mode (16, mode);
22544 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22545 The unwinder still needs to know the size of Altivec registers. */
22547 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22549 int column = DWARF_REG_TO_UNWIND_COLUMN
22550 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22551 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22553 emit_move_insn (adjust_address (mem, mode, offset), value);
22558 /* Map internal gcc register numbers to debug format register numbers.
22559 FORMAT specifies the type of debug register number to use:
22560 0 -- debug information, except for frame-related sections
22561 1 -- DWARF .debug_frame section
22562 2 -- DWARF .eh_frame section */
22564 unsigned int
22565 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22567 /* On some platforms, we use the standard DWARF register
22568 numbering for .debug_info and .debug_frame. */
22569 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22571 #ifdef RS6000_USE_DWARF_NUMBERING
22572 if (regno <= 31)
22573 return regno;
22574 if (FP_REGNO_P (regno))
22575 return regno - FIRST_FPR_REGNO + 32;
22576 if (ALTIVEC_REGNO_P (regno))
22577 return regno - FIRST_ALTIVEC_REGNO + 1124;
22578 if (regno == LR_REGNO)
22579 return 108;
22580 if (regno == CTR_REGNO)
22581 return 109;
22582 if (regno == CA_REGNO)
22583 return 101; /* XER */
22584 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22585 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22586 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22587 to the DWARF reg for CR. */
22588 if (format == 1 && regno == CR2_REGNO)
22589 return 64;
22590 if (CR_REGNO_P (regno))
22591 return regno - CR0_REGNO + 86;
22592 if (regno == VRSAVE_REGNO)
22593 return 356;
22594 if (regno == VSCR_REGNO)
22595 return 67;
22597 /* These do not make much sense. */
22598 if (regno == FRAME_POINTER_REGNUM)
22599 return 111;
22600 if (regno == ARG_POINTER_REGNUM)
22601 return 67;
22602 if (regno == 64)
22603 return 100;
22605 gcc_unreachable ();
22606 #endif
22609 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22610 information, and also for .eh_frame. */
22611 /* Translate the regnos to their numbers in GCC 7 (and before). */
22612 if (regno <= 31)
22613 return regno;
22614 if (FP_REGNO_P (regno))
22615 return regno - FIRST_FPR_REGNO + 32;
22616 if (ALTIVEC_REGNO_P (regno))
22617 return regno - FIRST_ALTIVEC_REGNO + 77;
22618 if (regno == LR_REGNO)
22619 return 65;
22620 if (regno == CTR_REGNO)
22621 return 66;
22622 if (regno == CA_REGNO)
22623 return 76; /* XER */
22624 if (CR_REGNO_P (regno))
22625 return regno - CR0_REGNO + 68;
22626 if (regno == VRSAVE_REGNO)
22627 return 109;
22628 if (regno == VSCR_REGNO)
22629 return 110;
22631 if (regno == FRAME_POINTER_REGNUM)
22632 return 111;
22633 if (regno == ARG_POINTER_REGNUM)
22634 return 67;
22635 if (regno == 64)
22636 return 64;
22638 gcc_unreachable ();
22641 /* target hook eh_return_filter_mode */
22642 static scalar_int_mode
22643 rs6000_eh_return_filter_mode (void)
22645 return TARGET_32BIT ? SImode : word_mode;
22648 /* Target hook for translate_mode_attribute. */
22649 static machine_mode
22650 rs6000_translate_mode_attribute (machine_mode mode)
22652 if ((FLOAT128_IEEE_P (mode)
22653 && ieee128_float_type_node == long_double_type_node)
22654 || (FLOAT128_IBM_P (mode)
22655 && ibm128_float_type_node == long_double_type_node))
22656 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22657 return mode;
22660 /* Target hook for scalar_mode_supported_p. */
22661 static bool
22662 rs6000_scalar_mode_supported_p (scalar_mode mode)
22664 /* -m32 does not support TImode. This is the default, from
22665 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22666 same ABI as for -m32. But default_scalar_mode_supported_p allows
22667 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22668 for -mpowerpc64. */
22669 if (TARGET_32BIT && mode == TImode)
22670 return false;
22672 if (DECIMAL_FLOAT_MODE_P (mode))
22673 return default_decimal_float_supported_p ();
22674 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22675 return true;
22676 else
22677 return default_scalar_mode_supported_p (mode);
22680 /* Target hook for vector_mode_supported_p. */
22681 static bool
22682 rs6000_vector_mode_supported_p (machine_mode mode)
22684 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22685 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22686 double-double. */
22687 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22688 return true;
22690 else
22691 return false;
22694 /* Target hook for floatn_mode. */
22695 static opt_scalar_float_mode
22696 rs6000_floatn_mode (int n, bool extended)
22698 if (extended)
22700 switch (n)
22702 case 32:
22703 return DFmode;
22705 case 64:
22706 if (TARGET_FLOAT128_TYPE)
22707 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22708 else
22709 return opt_scalar_float_mode ();
22711 case 128:
22712 return opt_scalar_float_mode ();
22714 default:
22715 /* Those are the only valid _FloatNx types. */
22716 gcc_unreachable ();
22719 else
22721 switch (n)
22723 case 32:
22724 return SFmode;
22726 case 64:
22727 return DFmode;
22729 case 128:
22730 if (TARGET_FLOAT128_TYPE)
22731 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22732 else
22733 return opt_scalar_float_mode ();
22735 default:
22736 return opt_scalar_float_mode ();
22742 /* Target hook for c_mode_for_suffix. */
22743 static machine_mode
22744 rs6000_c_mode_for_suffix (char suffix)
22746 if (TARGET_FLOAT128_TYPE)
22748 if (suffix == 'q' || suffix == 'Q')
22749 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22751 /* At the moment, we are not defining a suffix for IBM extended double.
22752 If/when the default for -mabi=ieeelongdouble is changed, and we want
22753 to support __ibm128 constants in legacy library code, we may need to
22754 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22755 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22756 __float80 constants. */
22759 return VOIDmode;
22762 /* Target hook for invalid_arg_for_unprototyped_fn. */
22763 static const char *
22764 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22766 return (!rs6000_darwin64_abi
22767 && typelist == 0
22768 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22769 && (funcdecl == NULL_TREE
22770 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22771 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22772 ? N_("AltiVec argument passed to unprototyped function")
22773 : NULL;
22776 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22777 setup by using __stack_chk_fail_local hidden function instead of
22778 calling __stack_chk_fail directly. Otherwise it is better to call
22779 __stack_chk_fail directly. */
22781 static tree ATTRIBUTE_UNUSED
22782 rs6000_stack_protect_fail (void)
22784 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22785 ? default_hidden_stack_protect_fail ()
22786 : default_external_stack_protect_fail ();
22789 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22791 #if TARGET_ELF
22792 static unsigned HOST_WIDE_INT
22793 rs6000_asan_shadow_offset (void)
22795 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22797 #endif
22799 /* Mask options that we want to support inside of attribute((target)) and
22800 #pragma GCC target operations. Note, we do not include things like
22801 64/32-bit, endianness, hard/soft floating point, etc. that would have
22802 different calling sequences. */
22804 struct rs6000_opt_mask {
22805 const char *name; /* option name */
22806 HOST_WIDE_INT mask; /* mask to set */
22807 bool invert; /* invert sense of mask */
22808 bool valid_target; /* option is a target option */
22811 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22813 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22814 { "cmpb", OPTION_MASK_CMPB, false, true },
22815 { "crypto", OPTION_MASK_CRYPTO, false, true },
22816 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22817 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22818 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22819 false, true },
22820 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22821 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22822 { "fprnd", OPTION_MASK_FPRND, false, true },
22823 { "future", OPTION_MASK_FUTURE, false, true },
22824 { "hard-dfp", OPTION_MASK_DFP, false, true },
22825 { "htm", OPTION_MASK_HTM, false, true },
22826 { "isel", OPTION_MASK_ISEL, false, true },
22827 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22828 { "mfpgpr", 0, false, true },
22829 { "modulo", OPTION_MASK_MODULO, false, true },
22830 { "mulhw", OPTION_MASK_MULHW, false, true },
22831 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22832 { "pcrel", OPTION_MASK_PCREL, false, true },
22833 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22834 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22835 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22836 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22837 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22838 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22839 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22840 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22841 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22842 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22843 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
22844 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22845 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22846 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22847 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22848 { "string", 0, false, true },
22849 { "update", OPTION_MASK_NO_UPDATE, true , true },
22850 { "vsx", OPTION_MASK_VSX, false, true },
22851 #ifdef OPTION_MASK_64BIT
22852 #if TARGET_AIX_OS
22853 { "aix64", OPTION_MASK_64BIT, false, false },
22854 { "aix32", OPTION_MASK_64BIT, true, false },
22855 #else
22856 { "64", OPTION_MASK_64BIT, false, false },
22857 { "32", OPTION_MASK_64BIT, true, false },
22858 #endif
22859 #endif
22860 #ifdef OPTION_MASK_EABI
22861 { "eabi", OPTION_MASK_EABI, false, false },
22862 #endif
22863 #ifdef OPTION_MASK_LITTLE_ENDIAN
22864 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22865 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22866 #endif
22867 #ifdef OPTION_MASK_RELOCATABLE
22868 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22869 #endif
22870 #ifdef OPTION_MASK_STRICT_ALIGN
22871 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22872 #endif
22873 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22874 { "string", 0, false, false },
22877 /* Builtin mask mapping for printing the flags. */
22878 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22880 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22881 { "vsx", RS6000_BTM_VSX, false, false },
22882 { "fre", RS6000_BTM_FRE, false, false },
22883 { "fres", RS6000_BTM_FRES, false, false },
22884 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22885 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22886 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22887 { "cell", RS6000_BTM_CELL, false, false },
22888 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22889 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22890 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22891 { "crypto", RS6000_BTM_CRYPTO, false, false },
22892 { "htm", RS6000_BTM_HTM, false, false },
22893 { "hard-dfp", RS6000_BTM_DFP, false, false },
22894 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22895 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22896 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22897 { "float128", RS6000_BTM_FLOAT128, false, false },
22898 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22901 /* Option variables that we want to support inside attribute((target)) and
22902 #pragma GCC target operations. */
22904 struct rs6000_opt_var {
22905 const char *name; /* option name */
22906 size_t global_offset; /* offset of the option in global_options. */
22907 size_t target_offset; /* offset of the option in target options. */
22910 static struct rs6000_opt_var const rs6000_opt_vars[] =
22912 { "friz",
22913 offsetof (struct gcc_options, x_TARGET_FRIZ),
22914 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22915 { "avoid-indexed-addresses",
22916 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22917 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22918 { "longcall",
22919 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22920 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22921 { "optimize-swaps",
22922 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22923 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22924 { "allow-movmisalign",
22925 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22926 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22927 { "sched-groups",
22928 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22929 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22930 { "always-hint",
22931 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22932 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22933 { "align-branch-targets",
22934 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22935 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22936 { "sched-prolog",
22937 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22938 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22939 { "sched-epilog",
22940 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22941 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22942 { "speculate-indirect-jumps",
22943 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22944 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22947 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22948 parsing. Return true if there were no errors. */
22950 static bool
22951 rs6000_inner_target_options (tree args, bool attr_p)
22953 bool ret = true;
22955 if (args == NULL_TREE)
22958 else if (TREE_CODE (args) == STRING_CST)
22960 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22961 char *q;
22963 while ((q = strtok (p, ",")) != NULL)
22965 bool error_p = false;
22966 bool not_valid_p = false;
22967 const char *cpu_opt = NULL;
22969 p = NULL;
22970 if (strncmp (q, "cpu=", 4) == 0)
22972 int cpu_index = rs6000_cpu_name_lookup (q+4);
22973 if (cpu_index >= 0)
22974 rs6000_cpu_index = cpu_index;
22975 else
22977 error_p = true;
22978 cpu_opt = q+4;
22981 else if (strncmp (q, "tune=", 5) == 0)
22983 int tune_index = rs6000_cpu_name_lookup (q+5);
22984 if (tune_index >= 0)
22985 rs6000_tune_index = tune_index;
22986 else
22988 error_p = true;
22989 cpu_opt = q+5;
22992 else
22994 size_t i;
22995 bool invert = false;
22996 char *r = q;
22998 error_p = true;
22999 if (strncmp (r, "no-", 3) == 0)
23001 invert = true;
23002 r += 3;
23005 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23006 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23008 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23010 if (!rs6000_opt_masks[i].valid_target)
23011 not_valid_p = true;
23012 else
23014 error_p = false;
23015 rs6000_isa_flags_explicit |= mask;
23017 /* VSX needs altivec, so -mvsx automagically sets
23018 altivec and disables -mavoid-indexed-addresses. */
23019 if (!invert)
23021 if (mask == OPTION_MASK_VSX)
23023 mask |= OPTION_MASK_ALTIVEC;
23024 TARGET_AVOID_XFORM = 0;
23028 if (rs6000_opt_masks[i].invert)
23029 invert = !invert;
23031 if (invert)
23032 rs6000_isa_flags &= ~mask;
23033 else
23034 rs6000_isa_flags |= mask;
23036 break;
23039 if (error_p && !not_valid_p)
23041 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23042 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23044 size_t j = rs6000_opt_vars[i].global_offset;
23045 *((int *) ((char *)&global_options + j)) = !invert;
23046 error_p = false;
23047 not_valid_p = false;
23048 break;
23053 if (error_p)
23055 const char *eprefix, *esuffix;
23057 ret = false;
23058 if (attr_p)
23060 eprefix = "__attribute__((__target__(";
23061 esuffix = ")))";
23063 else
23065 eprefix = "#pragma GCC target ";
23066 esuffix = "";
23069 if (cpu_opt)
23070 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23071 q, esuffix);
23072 else if (not_valid_p)
23073 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23074 else
23075 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23080 else if (TREE_CODE (args) == TREE_LIST)
23084 tree value = TREE_VALUE (args);
23085 if (value)
23087 bool ret2 = rs6000_inner_target_options (value, attr_p);
23088 if (!ret2)
23089 ret = false;
23091 args = TREE_CHAIN (args);
23093 while (args != NULL_TREE);
23096 else
23098 error ("attribute %<target%> argument not a string");
23099 return false;
23102 return ret;
23105 /* Print out the target options as a list for -mdebug=target. */
23107 static void
23108 rs6000_debug_target_options (tree args, const char *prefix)
23110 if (args == NULL_TREE)
23111 fprintf (stderr, "%s<NULL>", prefix);
23113 else if (TREE_CODE (args) == STRING_CST)
23115 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23116 char *q;
23118 while ((q = strtok (p, ",")) != NULL)
23120 p = NULL;
23121 fprintf (stderr, "%s\"%s\"", prefix, q);
23122 prefix = ", ";
23126 else if (TREE_CODE (args) == TREE_LIST)
23130 tree value = TREE_VALUE (args);
23131 if (value)
23133 rs6000_debug_target_options (value, prefix);
23134 prefix = ", ";
23136 args = TREE_CHAIN (args);
23138 while (args != NULL_TREE);
23141 else
23142 gcc_unreachable ();
23144 return;
23148 /* Hook to validate attribute((target("..."))). */
23150 static bool
23151 rs6000_valid_attribute_p (tree fndecl,
23152 tree ARG_UNUSED (name),
23153 tree args,
23154 int flags)
23156 struct cl_target_option cur_target;
23157 bool ret;
23158 tree old_optimize;
23159 tree new_target, new_optimize;
23160 tree func_optimize;
23162 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23164 if (TARGET_DEBUG_TARGET)
23166 tree tname = DECL_NAME (fndecl);
23167 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23168 if (tname)
23169 fprintf (stderr, "function: %.*s\n",
23170 (int) IDENTIFIER_LENGTH (tname),
23171 IDENTIFIER_POINTER (tname));
23172 else
23173 fprintf (stderr, "function: unknown\n");
23175 fprintf (stderr, "args:");
23176 rs6000_debug_target_options (args, " ");
23177 fprintf (stderr, "\n");
23179 if (flags)
23180 fprintf (stderr, "flags: 0x%x\n", flags);
23182 fprintf (stderr, "--------------------\n");
23185 /* attribute((target("default"))) does nothing, beyond
23186 affecting multi-versioning. */
23187 if (TREE_VALUE (args)
23188 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23189 && TREE_CHAIN (args) == NULL_TREE
23190 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23191 return true;
23193 old_optimize = build_optimization_node (&global_options);
23194 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23196 /* If the function changed the optimization levels as well as setting target
23197 options, start with the optimizations specified. */
23198 if (func_optimize && func_optimize != old_optimize)
23199 cl_optimization_restore (&global_options,
23200 TREE_OPTIMIZATION (func_optimize));
23202 /* The target attributes may also change some optimization flags, so update
23203 the optimization options if necessary. */
23204 cl_target_option_save (&cur_target, &global_options);
23205 rs6000_cpu_index = rs6000_tune_index = -1;
23206 ret = rs6000_inner_target_options (args, true);
23208 /* Set up any additional state. */
23209 if (ret)
23211 ret = rs6000_option_override_internal (false);
23212 new_target = build_target_option_node (&global_options);
23214 else
23215 new_target = NULL;
23217 new_optimize = build_optimization_node (&global_options);
23219 if (!new_target)
23220 ret = false;
23222 else if (fndecl)
23224 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23226 if (old_optimize != new_optimize)
23227 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23230 cl_target_option_restore (&global_options, &cur_target);
23232 if (old_optimize != new_optimize)
23233 cl_optimization_restore (&global_options,
23234 TREE_OPTIMIZATION (old_optimize));
23236 return ret;
23240 /* Hook to validate the current #pragma GCC target and set the state, and
23241 update the macros based on what was changed. If ARGS is NULL, then
23242 POP_TARGET is used to reset the options. */
23244 bool
23245 rs6000_pragma_target_parse (tree args, tree pop_target)
23247 tree prev_tree = build_target_option_node (&global_options);
23248 tree cur_tree;
23249 struct cl_target_option *prev_opt, *cur_opt;
23250 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23251 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23253 if (TARGET_DEBUG_TARGET)
23255 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23256 fprintf (stderr, "args:");
23257 rs6000_debug_target_options (args, " ");
23258 fprintf (stderr, "\n");
23260 if (pop_target)
23262 fprintf (stderr, "pop_target:\n");
23263 debug_tree (pop_target);
23265 else
23266 fprintf (stderr, "pop_target: <NULL>\n");
23268 fprintf (stderr, "--------------------\n");
23271 if (! args)
23273 cur_tree = ((pop_target)
23274 ? pop_target
23275 : target_option_default_node);
23276 cl_target_option_restore (&global_options,
23277 TREE_TARGET_OPTION (cur_tree));
23279 else
23281 rs6000_cpu_index = rs6000_tune_index = -1;
23282 if (!rs6000_inner_target_options (args, false)
23283 || !rs6000_option_override_internal (false)
23284 || (cur_tree = build_target_option_node (&global_options))
23285 == NULL_TREE)
23287 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23288 fprintf (stderr, "invalid pragma\n");
23290 return false;
23294 target_option_current_node = cur_tree;
23295 rs6000_activate_target_options (target_option_current_node);
23297 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23298 change the macros that are defined. */
23299 if (rs6000_target_modify_macros_ptr)
23301 prev_opt = TREE_TARGET_OPTION (prev_tree);
23302 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23303 prev_flags = prev_opt->x_rs6000_isa_flags;
23305 cur_opt = TREE_TARGET_OPTION (cur_tree);
23306 cur_flags = cur_opt->x_rs6000_isa_flags;
23307 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23309 diff_bumask = (prev_bumask ^ cur_bumask);
23310 diff_flags = (prev_flags ^ cur_flags);
23312 if ((diff_flags != 0) || (diff_bumask != 0))
23314 /* Delete old macros. */
23315 rs6000_target_modify_macros_ptr (false,
23316 prev_flags & diff_flags,
23317 prev_bumask & diff_bumask);
23319 /* Define new macros. */
23320 rs6000_target_modify_macros_ptr (true,
23321 cur_flags & diff_flags,
23322 cur_bumask & diff_bumask);
23326 return true;
23330 /* Remember the last target of rs6000_set_current_function. */
23331 static GTY(()) tree rs6000_previous_fndecl;
23333 /* Restore target's globals from NEW_TREE and invalidate the
23334 rs6000_previous_fndecl cache. */
23336 void
23337 rs6000_activate_target_options (tree new_tree)
23339 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23340 if (TREE_TARGET_GLOBALS (new_tree))
23341 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23342 else if (new_tree == target_option_default_node)
23343 restore_target_globals (&default_target_globals);
23344 else
23345 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23346 rs6000_previous_fndecl = NULL_TREE;
23349 /* Establish appropriate back-end context for processing the function
23350 FNDECL. The argument might be NULL to indicate processing at top
23351 level, outside of any function scope. */
23352 static void
23353 rs6000_set_current_function (tree fndecl)
23355 if (TARGET_DEBUG_TARGET)
23357 fprintf (stderr, "\n==================== rs6000_set_current_function");
23359 if (fndecl)
23360 fprintf (stderr, ", fndecl %s (%p)",
23361 (DECL_NAME (fndecl)
23362 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23363 : "<unknown>"), (void *)fndecl);
23365 if (rs6000_previous_fndecl)
23366 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23368 fprintf (stderr, "\n");
23371 /* Only change the context if the function changes. This hook is called
23372 several times in the course of compiling a function, and we don't want to
23373 slow things down too much or call target_reinit when it isn't safe. */
23374 if (fndecl == rs6000_previous_fndecl)
23375 return;
23377 tree old_tree;
23378 if (rs6000_previous_fndecl == NULL_TREE)
23379 old_tree = target_option_current_node;
23380 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23381 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23382 else
23383 old_tree = target_option_default_node;
23385 tree new_tree;
23386 if (fndecl == NULL_TREE)
23388 if (old_tree != target_option_current_node)
23389 new_tree = target_option_current_node;
23390 else
23391 new_tree = NULL_TREE;
23393 else
23395 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23396 if (new_tree == NULL_TREE)
23397 new_tree = target_option_default_node;
23400 if (TARGET_DEBUG_TARGET)
23402 if (new_tree)
23404 fprintf (stderr, "\nnew fndecl target specific options:\n");
23405 debug_tree (new_tree);
23408 if (old_tree)
23410 fprintf (stderr, "\nold fndecl target specific options:\n");
23411 debug_tree (old_tree);
23414 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23415 fprintf (stderr, "--------------------\n");
23418 if (new_tree && old_tree != new_tree)
23419 rs6000_activate_target_options (new_tree);
23421 if (fndecl)
23422 rs6000_previous_fndecl = fndecl;
23426 /* Save the current options */
23428 static void
23429 rs6000_function_specific_save (struct cl_target_option *ptr,
23430 struct gcc_options *opts)
23432 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23433 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23436 /* Restore the current options */
23438 static void
23439 rs6000_function_specific_restore (struct gcc_options *opts,
23440 struct cl_target_option *ptr)
23443 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23444 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23445 (void) rs6000_option_override_internal (false);
23448 /* Print the current options */
23450 static void
23451 rs6000_function_specific_print (FILE *file, int indent,
23452 struct cl_target_option *ptr)
23454 rs6000_print_isa_options (file, indent, "Isa options set",
23455 ptr->x_rs6000_isa_flags);
23457 rs6000_print_isa_options (file, indent, "Isa options explicit",
23458 ptr->x_rs6000_isa_flags_explicit);
23461 /* Helper function to print the current isa or misc options on a line. */
23463 static void
23464 rs6000_print_options_internal (FILE *file,
23465 int indent,
23466 const char *string,
23467 HOST_WIDE_INT flags,
23468 const char *prefix,
23469 const struct rs6000_opt_mask *opts,
23470 size_t num_elements)
23472 size_t i;
23473 size_t start_column = 0;
23474 size_t cur_column;
23475 size_t max_column = 120;
23476 size_t prefix_len = strlen (prefix);
23477 size_t comma_len = 0;
23478 const char *comma = "";
23480 if (indent)
23481 start_column += fprintf (file, "%*s", indent, "");
23483 if (!flags)
23485 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23486 return;
23489 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23491 /* Print the various mask options. */
23492 cur_column = start_column;
23493 for (i = 0; i < num_elements; i++)
23495 bool invert = opts[i].invert;
23496 const char *name = opts[i].name;
23497 const char *no_str = "";
23498 HOST_WIDE_INT mask = opts[i].mask;
23499 size_t len = comma_len + prefix_len + strlen (name);
23501 if (!invert)
23503 if ((flags & mask) == 0)
23505 no_str = "no-";
23506 len += sizeof ("no-") - 1;
23509 flags &= ~mask;
23512 else
23514 if ((flags & mask) != 0)
23516 no_str = "no-";
23517 len += sizeof ("no-") - 1;
23520 flags |= mask;
23523 cur_column += len;
23524 if (cur_column > max_column)
23526 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23527 cur_column = start_column + len;
23528 comma = "";
23531 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23532 comma = ", ";
23533 comma_len = sizeof (", ") - 1;
23536 fputs ("\n", file);
23539 /* Helper function to print the current isa options on a line. */
23541 static void
23542 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23543 HOST_WIDE_INT flags)
23545 rs6000_print_options_internal (file, indent, string, flags, "-m",
23546 &rs6000_opt_masks[0],
23547 ARRAY_SIZE (rs6000_opt_masks));
23550 static void
23551 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23552 HOST_WIDE_INT flags)
23554 rs6000_print_options_internal (file, indent, string, flags, "",
23555 &rs6000_builtin_mask_names[0],
23556 ARRAY_SIZE (rs6000_builtin_mask_names));
23559 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23560 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23561 -mupper-regs-df, etc.).
23563 If the user used -mno-power8-vector, we need to turn off all of the implicit
23564 ISA 2.07 and 3.0 options that relate to the vector unit.
23566 If the user used -mno-power9-vector, we need to turn off all of the implicit
23567 ISA 3.0 options that relate to the vector unit.
23569 This function does not handle explicit options such as the user specifying
23570 -mdirect-move. These are handled in rs6000_option_override_internal, and
23571 the appropriate error is given if needed.
23573 We return a mask of all of the implicit options that should not be enabled
23574 by default. */
23576 static HOST_WIDE_INT
23577 rs6000_disable_incompatible_switches (void)
23579 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23580 size_t i, j;
23582 static const struct {
23583 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23584 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23585 const char *const name; /* name of the switch. */
23586 } flags[] = {
23587 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23588 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23589 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23590 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23593 for (i = 0; i < ARRAY_SIZE (flags); i++)
23595 HOST_WIDE_INT no_flag = flags[i].no_flag;
23597 if ((rs6000_isa_flags & no_flag) == 0
23598 && (rs6000_isa_flags_explicit & no_flag) != 0)
23600 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23601 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23602 & rs6000_isa_flags
23603 & dep_flags);
23605 if (set_flags)
23607 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23608 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23610 set_flags &= ~rs6000_opt_masks[j].mask;
23611 error ("%<-mno-%s%> turns off %<-m%s%>",
23612 flags[i].name,
23613 rs6000_opt_masks[j].name);
23616 gcc_assert (!set_flags);
23619 rs6000_isa_flags &= ~dep_flags;
23620 ignore_masks |= no_flag | dep_flags;
23624 return ignore_masks;
23628 /* Helper function for printing the function name when debugging. */
23630 static const char *
23631 get_decl_name (tree fn)
23633 tree name;
23635 if (!fn)
23636 return "<null>";
23638 name = DECL_NAME (fn);
23639 if (!name)
23640 return "<no-name>";
23642 return IDENTIFIER_POINTER (name);
23645 /* Return the clone id of the target we are compiling code for in a target
23646 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23647 the priority list for the target clones (ordered from lowest to
23648 highest). */
23650 static int
23651 rs6000_clone_priority (tree fndecl)
23653 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23654 HOST_WIDE_INT isa_masks;
23655 int ret = CLONE_DEFAULT;
23656 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23657 const char *attrs_str = NULL;
23659 attrs = TREE_VALUE (TREE_VALUE (attrs));
23660 attrs_str = TREE_STRING_POINTER (attrs);
23662 /* Return priority zero for default function. Return the ISA needed for the
23663 function if it is not the default. */
23664 if (strcmp (attrs_str, "default") != 0)
23666 if (fn_opts == NULL_TREE)
23667 fn_opts = target_option_default_node;
23669 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23670 isa_masks = rs6000_isa_flags;
23671 else
23672 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23674 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23675 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23676 break;
23679 if (TARGET_DEBUG_TARGET)
23680 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23681 get_decl_name (fndecl), ret);
23683 return ret;
23686 /* This compares the priority of target features in function DECL1 and DECL2.
23687 It returns positive value if DECL1 is higher priority, negative value if
23688 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23689 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23691 static int
23692 rs6000_compare_version_priority (tree decl1, tree decl2)
23694 int priority1 = rs6000_clone_priority (decl1);
23695 int priority2 = rs6000_clone_priority (decl2);
23696 int ret = priority1 - priority2;
23698 if (TARGET_DEBUG_TARGET)
23699 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23700 get_decl_name (decl1), get_decl_name (decl2), ret);
23702 return ret;
23705 /* Make a dispatcher declaration for the multi-versioned function DECL.
23706 Calls to DECL function will be replaced with calls to the dispatcher
23707 by the front-end. Returns the decl of the dispatcher function. */
23709 static tree
23710 rs6000_get_function_versions_dispatcher (void *decl)
23712 tree fn = (tree) decl;
23713 struct cgraph_node *node = NULL;
23714 struct cgraph_node *default_node = NULL;
23715 struct cgraph_function_version_info *node_v = NULL;
23716 struct cgraph_function_version_info *first_v = NULL;
23718 tree dispatch_decl = NULL;
23720 struct cgraph_function_version_info *default_version_info = NULL;
23721 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23723 if (TARGET_DEBUG_TARGET)
23724 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23725 get_decl_name (fn));
23727 node = cgraph_node::get (fn);
23728 gcc_assert (node != NULL);
23730 node_v = node->function_version ();
23731 gcc_assert (node_v != NULL);
23733 if (node_v->dispatcher_resolver != NULL)
23734 return node_v->dispatcher_resolver;
23736 /* Find the default version and make it the first node. */
23737 first_v = node_v;
23738 /* Go to the beginning of the chain. */
23739 while (first_v->prev != NULL)
23740 first_v = first_v->prev;
23742 default_version_info = first_v;
23743 while (default_version_info != NULL)
23745 const tree decl2 = default_version_info->this_node->decl;
23746 if (is_function_default_version (decl2))
23747 break;
23748 default_version_info = default_version_info->next;
23751 /* If there is no default node, just return NULL. */
23752 if (default_version_info == NULL)
23753 return NULL;
23755 /* Make default info the first node. */
23756 if (first_v != default_version_info)
23758 default_version_info->prev->next = default_version_info->next;
23759 if (default_version_info->next)
23760 default_version_info->next->prev = default_version_info->prev;
23761 first_v->prev = default_version_info;
23762 default_version_info->next = first_v;
23763 default_version_info->prev = NULL;
23766 default_node = default_version_info->this_node;
23768 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23769 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23770 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23771 "exports hardware capability bits");
23772 #else
23774 if (targetm.has_ifunc_p ())
23776 struct cgraph_function_version_info *it_v = NULL;
23777 struct cgraph_node *dispatcher_node = NULL;
23778 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23780 /* Right now, the dispatching is done via ifunc. */
23781 dispatch_decl = make_dispatcher_decl (default_node->decl);
23783 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23784 gcc_assert (dispatcher_node != NULL);
23785 dispatcher_node->dispatcher_function = 1;
23786 dispatcher_version_info
23787 = dispatcher_node->insert_new_function_version ();
23788 dispatcher_version_info->next = default_version_info;
23789 dispatcher_node->definition = 1;
23791 /* Set the dispatcher for all the versions. */
23792 it_v = default_version_info;
23793 while (it_v != NULL)
23795 it_v->dispatcher_resolver = dispatch_decl;
23796 it_v = it_v->next;
23799 else
23801 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23802 "multiversioning needs ifunc which is not supported "
23803 "on this target");
23805 #endif
23807 return dispatch_decl;
23810 /* Make the resolver function decl to dispatch the versions of a multi-
23811 versioned function, DEFAULT_DECL. Create an empty basic block in the
23812 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23813 function. */
23815 static tree
23816 make_resolver_func (const tree default_decl,
23817 const tree dispatch_decl,
23818 basic_block *empty_bb)
23820 /* Make the resolver function static. The resolver function returns
23821 void *. */
23822 tree decl_name = clone_function_name (default_decl, "resolver");
23823 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23824 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23825 tree decl = build_fn_decl (resolver_name, type);
23826 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23828 DECL_NAME (decl) = decl_name;
23829 TREE_USED (decl) = 1;
23830 DECL_ARTIFICIAL (decl) = 1;
23831 DECL_IGNORED_P (decl) = 0;
23832 TREE_PUBLIC (decl) = 0;
23833 DECL_UNINLINABLE (decl) = 1;
23835 /* Resolver is not external, body is generated. */
23836 DECL_EXTERNAL (decl) = 0;
23837 DECL_EXTERNAL (dispatch_decl) = 0;
23839 DECL_CONTEXT (decl) = NULL_TREE;
23840 DECL_INITIAL (decl) = make_node (BLOCK);
23841 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23843 /* Build result decl and add to function_decl. */
23844 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23845 DECL_CONTEXT (t) = decl;
23846 DECL_ARTIFICIAL (t) = 1;
23847 DECL_IGNORED_P (t) = 1;
23848 DECL_RESULT (decl) = t;
23850 gimplify_function_tree (decl);
23851 push_cfun (DECL_STRUCT_FUNCTION (decl));
23852 *empty_bb = init_lowered_empty_function (decl, false,
23853 profile_count::uninitialized ());
23855 cgraph_node::add_new_function (decl, true);
23856 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23858 pop_cfun ();
23860 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23861 DECL_ATTRIBUTES (dispatch_decl)
23862 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23864 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23866 return decl;
23869 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23870 return a pointer to VERSION_DECL if we are running on a machine that
23871 supports the index CLONE_ISA hardware architecture bits. This function will
23872 be called during version dispatch to decide which function version to
23873 execute. It returns the basic block at the end, to which more conditions
23874 can be added. */
23876 static basic_block
23877 add_condition_to_bb (tree function_decl, tree version_decl,
23878 int clone_isa, basic_block new_bb)
23880 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23882 gcc_assert (new_bb != NULL);
23883 gimple_seq gseq = bb_seq (new_bb);
23886 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23887 build_fold_addr_expr (version_decl));
23888 tree result_var = create_tmp_var (ptr_type_node);
23889 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23890 gimple *return_stmt = gimple_build_return (result_var);
23892 if (clone_isa == CLONE_DEFAULT)
23894 gimple_seq_add_stmt (&gseq, convert_stmt);
23895 gimple_seq_add_stmt (&gseq, return_stmt);
23896 set_bb_seq (new_bb, gseq);
23897 gimple_set_bb (convert_stmt, new_bb);
23898 gimple_set_bb (return_stmt, new_bb);
23899 pop_cfun ();
23900 return new_bb;
23903 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23904 tree cond_var = create_tmp_var (bool_int_type_node);
23905 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23906 const char *arg_str = rs6000_clone_map[clone_isa].name;
23907 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23908 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23909 gimple_call_set_lhs (call_cond_stmt, cond_var);
23911 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23912 gimple_set_bb (call_cond_stmt, new_bb);
23913 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23915 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23916 NULL_TREE, NULL_TREE);
23917 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23918 gimple_set_bb (if_else_stmt, new_bb);
23919 gimple_seq_add_stmt (&gseq, if_else_stmt);
23921 gimple_seq_add_stmt (&gseq, convert_stmt);
23922 gimple_seq_add_stmt (&gseq, return_stmt);
23923 set_bb_seq (new_bb, gseq);
23925 basic_block bb1 = new_bb;
23926 edge e12 = split_block (bb1, if_else_stmt);
23927 basic_block bb2 = e12->dest;
23928 e12->flags &= ~EDGE_FALLTHRU;
23929 e12->flags |= EDGE_TRUE_VALUE;
23931 edge e23 = split_block (bb2, return_stmt);
23932 gimple_set_bb (convert_stmt, bb2);
23933 gimple_set_bb (return_stmt, bb2);
23935 basic_block bb3 = e23->dest;
23936 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
23938 remove_edge (e23);
23939 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
23941 pop_cfun ();
23942 return bb3;
23945 /* This function generates the dispatch function for multi-versioned functions.
23946 DISPATCH_DECL is the function which will contain the dispatch logic.
23947 FNDECLS are the function choices for dispatch, and is a tree chain.
23948 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23949 code is generated. */
23951 static int
23952 dispatch_function_versions (tree dispatch_decl,
23953 void *fndecls_p,
23954 basic_block *empty_bb)
23956 int ix;
23957 tree ele;
23958 vec<tree> *fndecls;
23959 tree clones[CLONE_MAX];
23961 if (TARGET_DEBUG_TARGET)
23962 fputs ("dispatch_function_versions, top\n", stderr);
23964 gcc_assert (dispatch_decl != NULL
23965 && fndecls_p != NULL
23966 && empty_bb != NULL);
23968 /* fndecls_p is actually a vector. */
23969 fndecls = static_cast<vec<tree> *> (fndecls_p);
23971 /* At least one more version other than the default. */
23972 gcc_assert (fndecls->length () >= 2);
23974 /* The first version in the vector is the default decl. */
23975 memset ((void *) clones, '\0', sizeof (clones));
23976 clones[CLONE_DEFAULT] = (*fndecls)[0];
23978 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23979 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23980 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23981 recent glibc. If we ever need to call __builtin_cpu_init, we would need
23982 to insert the code here to do the call. */
23984 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
23986 int priority = rs6000_clone_priority (ele);
23987 if (!clones[priority])
23988 clones[priority] = ele;
23991 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
23992 if (clones[ix])
23994 if (TARGET_DEBUG_TARGET)
23995 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
23996 ix, get_decl_name (clones[ix]));
23998 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
23999 *empty_bb);
24002 return 0;
24005 /* Generate the dispatching code body to dispatch multi-versioned function
24006 DECL. The target hook is called to process the "target" attributes and
24007 provide the code to dispatch the right function at run-time. NODE points
24008 to the dispatcher decl whose body will be created. */
24010 static tree
24011 rs6000_generate_version_dispatcher_body (void *node_p)
24013 tree resolver;
24014 basic_block empty_bb;
24015 struct cgraph_node *node = (cgraph_node *) node_p;
24016 struct cgraph_function_version_info *ninfo = node->function_version ();
24018 if (ninfo->dispatcher_resolver)
24019 return ninfo->dispatcher_resolver;
24021 /* node is going to be an alias, so remove the finalized bit. */
24022 node->definition = false;
24024 /* The first version in the chain corresponds to the default version. */
24025 ninfo->dispatcher_resolver = resolver
24026 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24028 if (TARGET_DEBUG_TARGET)
24029 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24030 get_decl_name (resolver));
24032 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24033 auto_vec<tree, 2> fn_ver_vec;
24035 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24036 vinfo;
24037 vinfo = vinfo->next)
24039 struct cgraph_node *version = vinfo->this_node;
24040 /* Check for virtual functions here again, as by this time it should
24041 have been determined if this function needs a vtable index or
24042 not. This happens for methods in derived classes that override
24043 virtual methods in base classes but are not explicitly marked as
24044 virtual. */
24045 if (DECL_VINDEX (version->decl))
24046 sorry ("Virtual function multiversioning not supported");
24048 fn_ver_vec.safe_push (version->decl);
24051 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24052 cgraph_edge::rebuild_edges ();
24053 pop_cfun ();
24054 return resolver;
24058 /* Hook to determine if one function can safely inline another. */
24060 static bool
24061 rs6000_can_inline_p (tree caller, tree callee)
24063 bool ret = false;
24064 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24065 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24067 /* If the callee has no option attributes, then it is ok to inline. */
24068 if (!callee_tree)
24069 ret = true;
24071 else
24073 HOST_WIDE_INT caller_isa;
24074 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24075 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24076 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24078 /* If the caller has option attributes, then use them.
24079 Otherwise, use the command line options. */
24080 if (caller_tree)
24081 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24082 else
24083 caller_isa = rs6000_isa_flags;
24085 /* The callee's options must be a subset of the caller's options, i.e.
24086 a vsx function may inline an altivec function, but a no-vsx function
24087 must not inline a vsx function. However, for those options that the
24088 callee has explicitly enabled or disabled, then we must enforce that
24089 the callee's and caller's options match exactly; see PR70010. */
24090 if (((caller_isa & callee_isa) == callee_isa)
24091 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24092 ret = true;
24095 if (TARGET_DEBUG_TARGET)
24096 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24097 get_decl_name (caller), get_decl_name (callee),
24098 (ret ? "can" : "cannot"));
24100 return ret;
24103 /* Allocate a stack temp and fixup the address so it meets the particular
24104 memory requirements (either offetable or REG+REG addressing). */
24107 rs6000_allocate_stack_temp (machine_mode mode,
24108 bool offsettable_p,
24109 bool reg_reg_p)
24111 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24112 rtx addr = XEXP (stack, 0);
24113 int strict_p = reload_completed;
24115 if (!legitimate_indirect_address_p (addr, strict_p))
24117 if (offsettable_p
24118 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24119 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24121 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24122 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24125 return stack;
24128 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24129 convert to such a form to deal with memory reference instructions
24130 like STFIWX and LDBRX that only take reg+reg addressing. */
24133 rs6000_force_indexed_or_indirect_mem (rtx x)
24135 machine_mode mode = GET_MODE (x);
24137 gcc_assert (MEM_P (x));
24138 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24140 rtx addr = XEXP (x, 0);
24141 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24143 rtx reg = XEXP (addr, 0);
24144 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24145 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24146 gcc_assert (REG_P (reg));
24147 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24148 addr = reg;
24150 else if (GET_CODE (addr) == PRE_MODIFY)
24152 rtx reg = XEXP (addr, 0);
24153 rtx expr = XEXP (addr, 1);
24154 gcc_assert (REG_P (reg));
24155 gcc_assert (GET_CODE (expr) == PLUS);
24156 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24157 addr = reg;
24160 if (GET_CODE (addr) == PLUS)
24162 rtx op0 = XEXP (addr, 0);
24163 rtx op1 = XEXP (addr, 1);
24164 op0 = force_reg (Pmode, op0);
24165 op1 = force_reg (Pmode, op1);
24166 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24168 else
24169 x = replace_equiv_address (x, force_reg (Pmode, addr));
24172 return x;
24175 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24177 On the RS/6000, all integer constants are acceptable, most won't be valid
24178 for particular insns, though. Only easy FP constants are acceptable. */
24180 static bool
24181 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24183 if (TARGET_ELF && tls_referenced_p (x))
24184 return false;
24186 if (CONST_DOUBLE_P (x))
24187 return easy_fp_constant (x, mode);
24189 if (GET_CODE (x) == CONST_VECTOR)
24190 return easy_vector_constant (x, mode);
24192 return true;
24196 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24198 static bool
24199 chain_already_loaded (rtx_insn *last)
24201 for (; last != NULL; last = PREV_INSN (last))
24203 if (NONJUMP_INSN_P (last))
24205 rtx patt = PATTERN (last);
24207 if (GET_CODE (patt) == SET)
24209 rtx lhs = XEXP (patt, 0);
24211 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24212 return true;
24216 return false;
24219 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24221 void
24222 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24224 rtx func = func_desc;
24225 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24226 rtx toc_load = NULL_RTX;
24227 rtx toc_restore = NULL_RTX;
24228 rtx func_addr;
24229 rtx abi_reg = NULL_RTX;
24230 rtx call[4];
24231 int n_call;
24232 rtx insn;
24233 bool is_pltseq_longcall;
24235 if (global_tlsarg)
24236 tlsarg = global_tlsarg;
24238 /* Handle longcall attributes. */
24239 is_pltseq_longcall = false;
24240 if ((INTVAL (cookie) & CALL_LONG) != 0
24241 && GET_CODE (func_desc) == SYMBOL_REF)
24243 func = rs6000_longcall_ref (func_desc, tlsarg);
24244 if (TARGET_PLTSEQ)
24245 is_pltseq_longcall = true;
24248 /* Handle indirect calls. */
24249 if (!SYMBOL_REF_P (func)
24250 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24252 if (!rs6000_pcrel_p (cfun))
24254 /* Save the TOC into its reserved slot before the call,
24255 and prepare to restore it after the call. */
24256 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24257 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24258 gen_rtvec (1, stack_toc_offset),
24259 UNSPEC_TOCSLOT);
24260 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24262 /* Can we optimize saving the TOC in the prologue or
24263 do we need to do it at every call? */
24264 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24265 cfun->machine->save_toc_in_prologue = true;
24266 else
24268 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24269 rtx stack_toc_mem = gen_frame_mem (Pmode,
24270 gen_rtx_PLUS (Pmode, stack_ptr,
24271 stack_toc_offset));
24272 MEM_VOLATILE_P (stack_toc_mem) = 1;
24273 if (is_pltseq_longcall)
24275 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24276 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24277 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24279 else
24280 emit_move_insn (stack_toc_mem, toc_reg);
24284 if (DEFAULT_ABI == ABI_ELFv2)
24286 /* A function pointer in the ELFv2 ABI is just a plain address, but
24287 the ABI requires it to be loaded into r12 before the call. */
24288 func_addr = gen_rtx_REG (Pmode, 12);
24289 if (!rtx_equal_p (func_addr, func))
24290 emit_move_insn (func_addr, func);
24291 abi_reg = func_addr;
24292 /* Indirect calls via CTR are strongly preferred over indirect
24293 calls via LR, so move the address there. Needed to mark
24294 this insn for linker plt sequence editing too. */
24295 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24296 if (is_pltseq_longcall)
24298 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24299 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24300 emit_insn (gen_rtx_SET (func_addr, mark_func));
24301 v = gen_rtvec (2, func_addr, func_desc);
24302 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24304 else
24305 emit_move_insn (func_addr, abi_reg);
24307 else
24309 /* A function pointer under AIX is a pointer to a data area whose
24310 first word contains the actual address of the function, whose
24311 second word contains a pointer to its TOC, and whose third word
24312 contains a value to place in the static chain register (r11).
24313 Note that if we load the static chain, our "trampoline" need
24314 not have any executable code. */
24316 /* Load up address of the actual function. */
24317 func = force_reg (Pmode, func);
24318 func_addr = gen_reg_rtx (Pmode);
24319 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24321 /* Indirect calls via CTR are strongly preferred over indirect
24322 calls via LR, so move the address there. */
24323 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24324 emit_move_insn (ctr_reg, func_addr);
24325 func_addr = ctr_reg;
24327 /* Prepare to load the TOC of the called function. Note that the
24328 TOC load must happen immediately before the actual call so
24329 that unwinding the TOC registers works correctly. See the
24330 comment in frob_update_context. */
24331 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24332 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24333 gen_rtx_PLUS (Pmode, func,
24334 func_toc_offset));
24335 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24337 /* If we have a static chain, load it up. But, if the call was
24338 originally direct, the 3rd word has not been written since no
24339 trampoline has been built, so we ought not to load it, lest we
24340 override a static chain value. */
24341 if (!(GET_CODE (func_desc) == SYMBOL_REF
24342 && SYMBOL_REF_FUNCTION_P (func_desc))
24343 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24344 && !chain_already_loaded (get_current_sequence ()->next->last))
24346 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24347 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24348 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24349 gen_rtx_PLUS (Pmode, func,
24350 func_sc_offset));
24351 emit_move_insn (sc_reg, func_sc_mem);
24352 abi_reg = sc_reg;
24356 else
24358 /* No TOC register needed for calls from PC-relative callers. */
24359 if (!rs6000_pcrel_p (cfun))
24360 /* Direct calls use the TOC: for local calls, the callee will
24361 assume the TOC register is set; for non-local calls, the
24362 PLT stub needs the TOC register. */
24363 abi_reg = toc_reg;
24364 func_addr = func;
24367 /* Create the call. */
24368 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24369 if (value != NULL_RTX)
24370 call[0] = gen_rtx_SET (value, call[0]);
24371 n_call = 1;
24373 if (toc_load)
24374 call[n_call++] = toc_load;
24375 if (toc_restore)
24376 call[n_call++] = toc_restore;
24378 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24380 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24381 insn = emit_call_insn (insn);
24383 /* Mention all registers defined by the ABI to hold information
24384 as uses in CALL_INSN_FUNCTION_USAGE. */
24385 if (abi_reg)
24386 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24389 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24391 void
24392 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24394 rtx call[2];
24395 rtx insn;
24397 gcc_assert (INTVAL (cookie) == 0);
24399 if (global_tlsarg)
24400 tlsarg = global_tlsarg;
24402 /* Create the call. */
24403 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24404 if (value != NULL_RTX)
24405 call[0] = gen_rtx_SET (value, call[0]);
24407 call[1] = simple_return_rtx;
24409 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24410 insn = emit_call_insn (insn);
24412 /* Note use of the TOC register. */
24413 if (!rs6000_pcrel_p (cfun))
24414 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24415 gen_rtx_REG (Pmode, TOC_REGNUM));
24418 /* Expand code to perform a call under the SYSV4 ABI. */
24420 void
24421 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24423 rtx func = func_desc;
24424 rtx func_addr;
24425 rtx call[4];
24426 rtx insn;
24427 rtx abi_reg = NULL_RTX;
24428 int n;
24430 if (global_tlsarg)
24431 tlsarg = global_tlsarg;
24433 /* Handle longcall attributes. */
24434 if ((INTVAL (cookie) & CALL_LONG) != 0
24435 && GET_CODE (func_desc) == SYMBOL_REF)
24437 func = rs6000_longcall_ref (func_desc, tlsarg);
24438 /* If the longcall was implemented as an inline PLT call using
24439 PLT unspecs then func will be REG:r11. If not, func will be
24440 a pseudo reg. The inline PLT call sequence supports lazy
24441 linking (and longcalls to functions in dlopen'd libraries).
24442 The other style of longcalls don't. The lazy linking entry
24443 to the dynamic symbol resolver requires r11 be the function
24444 address (as it is for linker generated PLT stubs). Ensure
24445 r11 stays valid to the bctrl by marking r11 used by the call. */
24446 if (TARGET_PLTSEQ)
24447 abi_reg = func;
24450 /* Handle indirect calls. */
24451 if (GET_CODE (func) != SYMBOL_REF)
24453 func = force_reg (Pmode, func);
24455 /* Indirect calls via CTR are strongly preferred over indirect
24456 calls via LR, so move the address there. That can't be left
24457 to reload because we want to mark every instruction in an
24458 inline PLT call sequence with a reloc, enabling the linker to
24459 edit the sequence back to a direct call when that makes sense. */
24460 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24461 if (abi_reg)
24463 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24464 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24465 emit_insn (gen_rtx_SET (func_addr, mark_func));
24466 v = gen_rtvec (2, func_addr, func_desc);
24467 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24469 else
24470 emit_move_insn (func_addr, func);
24472 else
24473 func_addr = func;
24475 /* Create the call. */
24476 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24477 if (value != NULL_RTX)
24478 call[0] = gen_rtx_SET (value, call[0]);
24480 call[1] = gen_rtx_USE (VOIDmode, cookie);
24481 n = 2;
24482 if (TARGET_SECURE_PLT
24483 && flag_pic
24484 && GET_CODE (func_addr) == SYMBOL_REF
24485 && !SYMBOL_REF_LOCAL_P (func_addr))
24486 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24488 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24490 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24491 insn = emit_call_insn (insn);
24492 if (abi_reg)
24493 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24496 /* Expand code to perform a sibling call under the SysV4 ABI. */
24498 void
24499 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24501 rtx func = func_desc;
24502 rtx func_addr;
24503 rtx call[3];
24504 rtx insn;
24505 rtx abi_reg = NULL_RTX;
24507 if (global_tlsarg)
24508 tlsarg = global_tlsarg;
24510 /* Handle longcall attributes. */
24511 if ((INTVAL (cookie) & CALL_LONG) != 0
24512 && GET_CODE (func_desc) == SYMBOL_REF)
24514 func = rs6000_longcall_ref (func_desc, tlsarg);
24515 /* If the longcall was implemented as an inline PLT call using
24516 PLT unspecs then func will be REG:r11. If not, func will be
24517 a pseudo reg. The inline PLT call sequence supports lazy
24518 linking (and longcalls to functions in dlopen'd libraries).
24519 The other style of longcalls don't. The lazy linking entry
24520 to the dynamic symbol resolver requires r11 be the function
24521 address (as it is for linker generated PLT stubs). Ensure
24522 r11 stays valid to the bctr by marking r11 used by the call. */
24523 if (TARGET_PLTSEQ)
24524 abi_reg = func;
24527 /* Handle indirect calls. */
24528 if (GET_CODE (func) != SYMBOL_REF)
24530 func = force_reg (Pmode, func);
24532 /* Indirect sibcalls must go via CTR. That can't be left to
24533 reload because we want to mark every instruction in an inline
24534 PLT call sequence with a reloc, enabling the linker to edit
24535 the sequence back to a direct call when that makes sense. */
24536 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24537 if (abi_reg)
24539 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24540 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24541 emit_insn (gen_rtx_SET (func_addr, mark_func));
24542 v = gen_rtvec (2, func_addr, func_desc);
24543 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24545 else
24546 emit_move_insn (func_addr, func);
24548 else
24549 func_addr = func;
24551 /* Create the call. */
24552 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24553 if (value != NULL_RTX)
24554 call[0] = gen_rtx_SET (value, call[0]);
24556 call[1] = gen_rtx_USE (VOIDmode, cookie);
24557 call[2] = simple_return_rtx;
24559 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24560 insn = emit_call_insn (insn);
24561 if (abi_reg)
24562 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24565 #if TARGET_MACHO
24567 /* Expand code to perform a call under the Darwin ABI.
24568 Modulo handling of mlongcall, this is much the same as sysv.
24569 if/when the longcall optimisation is removed, we could drop this
24570 code and use the sysv case (taking care to avoid the tls stuff).
24572 We can use this for sibcalls too, if needed. */
24574 void
24575 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24576 rtx cookie, bool sibcall)
24578 rtx func = func_desc;
24579 rtx func_addr;
24580 rtx call[3];
24581 rtx insn;
24582 int cookie_val = INTVAL (cookie);
24583 bool make_island = false;
24585 /* Handle longcall attributes, there are two cases for Darwin:
24586 1) Newer linkers are capable of synthesising any branch islands needed.
24587 2) We need a helper branch island synthesised by the compiler.
24588 The second case has mostly been retired and we don't use it for m64.
24589 In fact, it's is an optimisation, we could just indirect as sysv does..
24590 ... however, backwards compatibility for now.
24591 If we're going to use this, then we need to keep the CALL_LONG bit set,
24592 so that we can pick up the special insn form later. */
24593 if ((cookie_val & CALL_LONG) != 0
24594 && GET_CODE (func_desc) == SYMBOL_REF)
24596 /* FIXME: the longcall opt should not hang off this flag, it is most
24597 likely incorrect for kernel-mode code-generation. */
24598 if (darwin_symbol_stubs && TARGET_32BIT)
24599 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24600 else
24602 /* The linker is capable of doing this, but the user explicitly
24603 asked for -mlongcall, so we'll do the 'normal' version. */
24604 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24605 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24609 /* Handle indirect calls. */
24610 if (GET_CODE (func) != SYMBOL_REF)
24612 func = force_reg (Pmode, func);
24614 /* Indirect calls via CTR are strongly preferred over indirect
24615 calls via LR, and are required for indirect sibcalls, so move
24616 the address there. */
24617 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24618 emit_move_insn (func_addr, func);
24620 else
24621 func_addr = func;
24623 /* Create the call. */
24624 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24625 if (value != NULL_RTX)
24626 call[0] = gen_rtx_SET (value, call[0]);
24628 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24630 if (sibcall)
24631 call[2] = simple_return_rtx;
24632 else
24633 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24635 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24636 insn = emit_call_insn (insn);
24637 /* Now we have the debug info in the insn, we can set up the branch island
24638 if we're using one. */
24639 if (make_island)
24641 tree funname = get_identifier (XSTR (func_desc, 0));
24643 if (no_previous_def (funname))
24645 rtx label_rtx = gen_label_rtx ();
24646 char *label_buf, temp_buf[256];
24647 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24648 CODE_LABEL_NUMBER (label_rtx));
24649 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24650 tree labelname = get_identifier (label_buf);
24651 add_compiler_branch_island (labelname, funname,
24652 insn_line ((const rtx_insn*)insn));
24656 #endif
24658 void
24659 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24660 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24662 #if TARGET_MACHO
24663 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24664 #else
24665 gcc_unreachable();
24666 #endif
24670 void
24671 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24672 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24674 #if TARGET_MACHO
24675 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24676 #else
24677 gcc_unreachable();
24678 #endif
24681 /* Return whether we should generate PC-relative code for FNDECL. */
24682 bool
24683 rs6000_fndecl_pcrel_p (const_tree fndecl)
24685 if (DEFAULT_ABI != ABI_ELFv2)
24686 return false;
24688 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24690 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24691 && TARGET_CMODEL == CMODEL_MEDIUM);
24694 /* Return whether we should generate PC-relative code for *FN. */
24695 bool
24696 rs6000_pcrel_p (struct function *fn)
24698 if (DEFAULT_ABI != ABI_ELFv2)
24699 return false;
24701 /* Optimize usual case. */
24702 if (fn == cfun)
24703 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24704 && TARGET_CMODEL == CMODEL_MEDIUM);
24706 return rs6000_fndecl_pcrel_p (fn->decl);
24710 /* Given an address (ADDR), a mode (MODE), and what the format of the
24711 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24712 for the address. */
24714 enum insn_form
24715 address_to_insn_form (rtx addr,
24716 machine_mode mode,
24717 enum non_prefixed_form non_prefixed_format)
24719 /* Single register is easy. */
24720 if (REG_P (addr) || SUBREG_P (addr))
24721 return INSN_FORM_BASE_REG;
24723 /* If the non prefixed instruction format doesn't support offset addressing,
24724 make sure only indexed addressing is allowed.
24726 We special case SDmode so that the register allocator does not try to move
24727 SDmode through GPR registers, but instead uses the 32-bit integer load and
24728 store instructions for the floating point registers. */
24729 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24731 if (GET_CODE (addr) != PLUS)
24732 return INSN_FORM_BAD;
24734 rtx op0 = XEXP (addr, 0);
24735 rtx op1 = XEXP (addr, 1);
24736 if (!REG_P (op0) && !SUBREG_P (op0))
24737 return INSN_FORM_BAD;
24739 if (!REG_P (op1) && !SUBREG_P (op1))
24740 return INSN_FORM_BAD;
24742 return INSN_FORM_X;
24745 /* Deal with update forms. */
24746 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24747 return INSN_FORM_UPDATE;
24749 /* Handle PC-relative symbols and labels. Check for both local and external
24750 symbols. Assume labels are always local. */
24751 if (TARGET_PCREL)
24753 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24754 return INSN_FORM_PCREL_EXTERNAL;
24756 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24757 return INSN_FORM_PCREL_LOCAL;
24760 if (GET_CODE (addr) == CONST)
24761 addr = XEXP (addr, 0);
24763 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24764 if (GET_CODE (addr) == LO_SUM)
24765 return INSN_FORM_LO_SUM;
24767 /* Everything below must be an offset address of some form. */
24768 if (GET_CODE (addr) != PLUS)
24769 return INSN_FORM_BAD;
24771 rtx op0 = XEXP (addr, 0);
24772 rtx op1 = XEXP (addr, 1);
24774 /* Check for indexed addresses. */
24775 if (REG_P (op1) || SUBREG_P (op1))
24777 if (REG_P (op0) || SUBREG_P (op0))
24778 return INSN_FORM_X;
24780 return INSN_FORM_BAD;
24783 if (!CONST_INT_P (op1))
24784 return INSN_FORM_BAD;
24786 HOST_WIDE_INT offset = INTVAL (op1);
24787 if (!SIGNED_34BIT_OFFSET_P (offset))
24788 return INSN_FORM_BAD;
24790 /* Check for local and external PC-relative addresses. Labels are always
24791 local. */
24792 if (TARGET_PCREL)
24794 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24795 return INSN_FORM_PCREL_EXTERNAL;
24797 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24798 return INSN_FORM_PCREL_LOCAL;
24801 /* If it isn't PC-relative, the address must use a base register. */
24802 if (!REG_P (op0) && !SUBREG_P (op0))
24803 return INSN_FORM_BAD;
24805 /* Large offsets must be prefixed. */
24806 if (!SIGNED_16BIT_OFFSET_P (offset))
24808 if (TARGET_PREFIXED_ADDR)
24809 return INSN_FORM_PREFIXED_NUMERIC;
24811 return INSN_FORM_BAD;
24814 /* We have a 16-bit offset, see what default instruction format to use. */
24815 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24817 unsigned size = GET_MODE_SIZE (mode);
24819 /* On 64-bit systems, assume 64-bit integers need to use DS form
24820 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24821 (for LXV and STXV). TImode is problematical in that its normal usage
24822 is expected to be GPRs where it wants a DS instruction format, but if
24823 it goes into the vector registers, it wants a DQ instruction
24824 format. */
24825 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24826 non_prefixed_format = NON_PREFIXED_DS;
24828 else if (TARGET_VSX && size >= 16
24829 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24830 non_prefixed_format = NON_PREFIXED_DQ;
24832 else
24833 non_prefixed_format = NON_PREFIXED_D;
24836 /* Classify the D/DS/DQ-form addresses. */
24837 switch (non_prefixed_format)
24839 /* Instruction format D, all 16 bits are valid. */
24840 case NON_PREFIXED_D:
24841 return INSN_FORM_D;
24843 /* Instruction format DS, bottom 2 bits must be 0. */
24844 case NON_PREFIXED_DS:
24845 if ((offset & 3) == 0)
24846 return INSN_FORM_DS;
24848 else if (TARGET_PREFIXED_ADDR)
24849 return INSN_FORM_PREFIXED_NUMERIC;
24851 else
24852 return INSN_FORM_BAD;
24854 /* Instruction format DQ, bottom 4 bits must be 0. */
24855 case NON_PREFIXED_DQ:
24856 if ((offset & 15) == 0)
24857 return INSN_FORM_DQ;
24859 else if (TARGET_PREFIXED_ADDR)
24860 return INSN_FORM_PREFIXED_NUMERIC;
24862 else
24863 return INSN_FORM_BAD;
24865 default:
24866 break;
24869 return INSN_FORM_BAD;
24872 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24873 instruction format (D/DS/DQ) used for offset memory. */
24875 static enum non_prefixed_form
24876 reg_to_non_prefixed (rtx reg, machine_mode mode)
24878 /* If it isn't a register, use the defaults. */
24879 if (!REG_P (reg) && !SUBREG_P (reg))
24880 return NON_PREFIXED_DEFAULT;
24882 unsigned int r = reg_or_subregno (reg);
24884 /* If we have a pseudo, use the default instruction format. */
24885 if (!HARD_REGISTER_NUM_P (r))
24886 return NON_PREFIXED_DEFAULT;
24888 unsigned size = GET_MODE_SIZE (mode);
24890 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24891 128-bit floating point, and 128-bit integers. */
24892 if (FP_REGNO_P (r))
24894 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24895 return NON_PREFIXED_D;
24897 else if (size < 8)
24898 return NON_PREFIXED_X;
24900 else if (TARGET_VSX && size >= 16
24901 && (VECTOR_MODE_P (mode)
24902 || FLOAT128_VECTOR_P (mode)
24903 || mode == TImode || mode == CTImode))
24904 return NON_PREFIXED_DQ;
24906 else
24907 return NON_PREFIXED_DEFAULT;
24910 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24911 128-bit floating point, and 128-bit integers. */
24912 else if (ALTIVEC_REGNO_P (r))
24914 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24915 return NON_PREFIXED_DS;
24917 else if (size < 8)
24918 return NON_PREFIXED_X;
24920 else if (TARGET_VSX && size >= 16
24921 && (VECTOR_MODE_P (mode)
24922 || FLOAT128_VECTOR_P (mode)
24923 || mode == TImode || mode == CTImode))
24924 return NON_PREFIXED_DQ;
24926 else
24927 return NON_PREFIXED_DEFAULT;
24930 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24931 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24932 through the GPR registers for memory operations. */
24933 else if (TARGET_POWERPC64 && size >= 8)
24934 return NON_PREFIXED_DS;
24936 return NON_PREFIXED_D;
24940 /* Whether a load instruction is a prefixed instruction. This is called from
24941 the prefixed attribute processing. */
24943 bool
24944 prefixed_load_p (rtx_insn *insn)
24946 /* Validate the insn to make sure it is a normal load insn. */
24947 extract_insn_cached (insn);
24948 if (recog_data.n_operands < 2)
24949 return false;
24951 rtx reg = recog_data.operand[0];
24952 rtx mem = recog_data.operand[1];
24954 if (!REG_P (reg) && !SUBREG_P (reg))
24955 return false;
24957 if (!MEM_P (mem))
24958 return false;
24960 /* Prefixed load instructions do not support update or indexed forms. */
24961 if (get_attr_indexed (insn) == INDEXED_YES
24962 || get_attr_update (insn) == UPDATE_YES)
24963 return false;
24965 /* LWA uses the DS format instead of the D format that LWZ uses. */
24966 enum non_prefixed_form non_prefixed;
24967 machine_mode reg_mode = GET_MODE (reg);
24968 machine_mode mem_mode = GET_MODE (mem);
24970 if (mem_mode == SImode && reg_mode == DImode
24971 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
24972 non_prefixed = NON_PREFIXED_DS;
24974 else
24975 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24977 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24980 /* Whether a store instruction is a prefixed instruction. This is called from
24981 the prefixed attribute processing. */
24983 bool
24984 prefixed_store_p (rtx_insn *insn)
24986 /* Validate the insn to make sure it is a normal store insn. */
24987 extract_insn_cached (insn);
24988 if (recog_data.n_operands < 2)
24989 return false;
24991 rtx mem = recog_data.operand[0];
24992 rtx reg = recog_data.operand[1];
24994 if (!REG_P (reg) && !SUBREG_P (reg))
24995 return false;
24997 if (!MEM_P (mem))
24998 return false;
25000 /* Prefixed store instructions do not support update or indexed forms. */
25001 if (get_attr_indexed (insn) == INDEXED_YES
25002 || get_attr_update (insn) == UPDATE_YES)
25003 return false;
25005 machine_mode mem_mode = GET_MODE (mem);
25006 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25007 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25010 /* Whether a load immediate or add instruction is a prefixed instruction. This
25011 is called from the prefixed attribute processing. */
25013 bool
25014 prefixed_paddi_p (rtx_insn *insn)
25016 rtx set = single_set (insn);
25017 if (!set)
25018 return false;
25020 rtx dest = SET_DEST (set);
25021 rtx src = SET_SRC (set);
25023 if (!REG_P (dest) && !SUBREG_P (dest))
25024 return false;
25026 /* Is this a load immediate that can't be done with a simple ADDI or
25027 ADDIS? */
25028 if (CONST_INT_P (src))
25029 return (satisfies_constraint_eI (src)
25030 && !satisfies_constraint_I (src)
25031 && !satisfies_constraint_L (src));
25033 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25034 ADDIS? */
25035 if (GET_CODE (src) == PLUS)
25037 rtx op1 = XEXP (src, 1);
25039 return (CONST_INT_P (op1)
25040 && satisfies_constraint_eI (op1)
25041 && !satisfies_constraint_I (op1)
25042 && !satisfies_constraint_L (op1));
25045 /* If not, is it a load of a PC-relative address? */
25046 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25047 return false;
25049 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25050 return false;
25052 enum insn_form iform = address_to_insn_form (src, Pmode,
25053 NON_PREFIXED_DEFAULT);
25055 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25058 /* Whether the next instruction needs a 'p' prefix issued before the
25059 instruction is printed out. */
25060 static bool next_insn_prefixed_p;
25062 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25063 outputting the assembler code. On the PowerPC, we remember if the current
25064 insn is a prefixed insn where we need to emit a 'p' before the insn.
25066 In addition, if the insn is part of a PC-relative reference to an external
25067 label optimization, this is recorded also. */
25068 void
25069 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25071 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25072 return;
25075 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25076 We use it to emit a 'p' for prefixed insns that is set in
25077 FINAL_PRESCAN_INSN. */
25078 void
25079 rs6000_asm_output_opcode (FILE *stream)
25081 if (next_insn_prefixed_p)
25082 fprintf (stream, "p");
25084 return;
25087 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25088 should be adjusted to reflect any required changes. This macro is used when
25089 there is some systematic length adjustment required that would be difficult
25090 to express in the length attribute.
25092 In the PowerPC, we use this to adjust the length of an instruction if one or
25093 more prefixed instructions are generated, using the attribute
25094 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25095 hardware requires that a prefied instruciton does not cross a 64-byte
25096 boundary. This means the compiler has to assume the length of the first
25097 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25098 already set for the non-prefixed instruction, we just need to udpate for the
25099 difference. */
25102 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25104 if (TARGET_PREFIXED_ADDR && NONJUMP_INSN_P (insn))
25106 rtx pattern = PATTERN (insn);
25107 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25108 && get_attr_prefixed (insn) == PREFIXED_YES)
25110 int num_prefixed = get_attr_max_prefixed_insns (insn);
25111 length += 4 * (num_prefixed + 1);
25115 return length;
25119 #ifdef HAVE_GAS_HIDDEN
25120 # define USE_HIDDEN_LINKONCE 1
25121 #else
25122 # define USE_HIDDEN_LINKONCE 0
25123 #endif
25125 /* Fills in the label name that should be used for a 476 link stack thunk. */
25127 void
25128 get_ppc476_thunk_name (char name[32])
25130 gcc_assert (TARGET_LINK_STACK);
25132 if (USE_HIDDEN_LINKONCE)
25133 sprintf (name, "__ppc476.get_thunk");
25134 else
25135 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25138 /* This function emits the simple thunk routine that is used to preserve
25139 the link stack on the 476 cpu. */
25141 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25142 static void
25143 rs6000_code_end (void)
25145 char name[32];
25146 tree decl;
25148 if (!TARGET_LINK_STACK)
25149 return;
25151 get_ppc476_thunk_name (name);
25153 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25154 build_function_type_list (void_type_node, NULL_TREE));
25155 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25156 NULL_TREE, void_type_node);
25157 TREE_PUBLIC (decl) = 1;
25158 TREE_STATIC (decl) = 1;
25160 #if RS6000_WEAK
25161 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25163 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25164 targetm.asm_out.unique_section (decl, 0);
25165 switch_to_section (get_named_section (decl, NULL, 0));
25166 DECL_WEAK (decl) = 1;
25167 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25168 targetm.asm_out.globalize_label (asm_out_file, name);
25169 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25170 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25172 else
25173 #endif
25175 switch_to_section (text_section);
25176 ASM_OUTPUT_LABEL (asm_out_file, name);
25179 DECL_INITIAL (decl) = make_node (BLOCK);
25180 current_function_decl = decl;
25181 allocate_struct_function (decl, false);
25182 init_function_start (decl);
25183 first_function_block_is_cold = false;
25184 /* Make sure unwind info is emitted for the thunk if needed. */
25185 final_start_function (emit_barrier (), asm_out_file, 1);
25187 fputs ("\tblr\n", asm_out_file);
25189 final_end_function ();
25190 init_insn_lengths ();
25191 free_after_compilation (cfun);
25192 set_cfun (NULL);
25193 current_function_decl = NULL;
25196 /* Add r30 to hard reg set if the prologue sets it up and it is not
25197 pic_offset_table_rtx. */
25199 static void
25200 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25202 if (!TARGET_SINGLE_PIC_BASE
25203 && TARGET_TOC
25204 && TARGET_MINIMAL_TOC
25205 && !constant_pool_empty_p ())
25206 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25207 if (cfun->machine->split_stack_argp_used)
25208 add_to_hard_reg_set (&set->set, Pmode, 12);
25210 /* Make sure the hard reg set doesn't include r2, which was possibly added
25211 via PIC_OFFSET_TABLE_REGNUM. */
25212 if (TARGET_TOC)
25213 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25217 /* Helper function for rs6000_split_logical to emit a logical instruction after
25218 spliting the operation to single GPR registers.
25220 DEST is the destination register.
25221 OP1 and OP2 are the input source registers.
25222 CODE is the base operation (AND, IOR, XOR, NOT).
25223 MODE is the machine mode.
25224 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25225 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25226 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25228 static void
25229 rs6000_split_logical_inner (rtx dest,
25230 rtx op1,
25231 rtx op2,
25232 enum rtx_code code,
25233 machine_mode mode,
25234 bool complement_final_p,
25235 bool complement_op1_p,
25236 bool complement_op2_p)
25238 rtx bool_rtx;
25240 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25241 if (op2 && CONST_INT_P (op2)
25242 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25243 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25245 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25246 HOST_WIDE_INT value = INTVAL (op2) & mask;
25248 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25249 if (code == AND)
25251 if (value == 0)
25253 emit_insn (gen_rtx_SET (dest, const0_rtx));
25254 return;
25257 else if (value == mask)
25259 if (!rtx_equal_p (dest, op1))
25260 emit_insn (gen_rtx_SET (dest, op1));
25261 return;
25265 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25266 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25267 else if (code == IOR || code == XOR)
25269 if (value == 0)
25271 if (!rtx_equal_p (dest, op1))
25272 emit_insn (gen_rtx_SET (dest, op1));
25273 return;
25278 if (code == AND && mode == SImode
25279 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25281 emit_insn (gen_andsi3 (dest, op1, op2));
25282 return;
25285 if (complement_op1_p)
25286 op1 = gen_rtx_NOT (mode, op1);
25288 if (complement_op2_p)
25289 op2 = gen_rtx_NOT (mode, op2);
25291 /* For canonical RTL, if only one arm is inverted it is the first. */
25292 if (!complement_op1_p && complement_op2_p)
25293 std::swap (op1, op2);
25295 bool_rtx = ((code == NOT)
25296 ? gen_rtx_NOT (mode, op1)
25297 : gen_rtx_fmt_ee (code, mode, op1, op2));
25299 if (complement_final_p)
25300 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25302 emit_insn (gen_rtx_SET (dest, bool_rtx));
25305 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25306 operations are split immediately during RTL generation to allow for more
25307 optimizations of the AND/IOR/XOR.
25309 OPERANDS is an array containing the destination and two input operands.
25310 CODE is the base operation (AND, IOR, XOR, NOT).
25311 MODE is the machine mode.
25312 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25313 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25314 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25315 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25316 formation of the AND instructions. */
25318 static void
25319 rs6000_split_logical_di (rtx operands[3],
25320 enum rtx_code code,
25321 bool complement_final_p,
25322 bool complement_op1_p,
25323 bool complement_op2_p)
25325 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25326 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25327 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25328 enum hi_lo { hi = 0, lo = 1 };
25329 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25330 size_t i;
25332 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25333 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25334 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25335 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25337 if (code == NOT)
25338 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25339 else
25341 if (!CONST_INT_P (operands[2]))
25343 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25344 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25346 else
25348 HOST_WIDE_INT value = INTVAL (operands[2]);
25349 HOST_WIDE_INT value_hi_lo[2];
25351 gcc_assert (!complement_final_p);
25352 gcc_assert (!complement_op1_p);
25353 gcc_assert (!complement_op2_p);
25355 value_hi_lo[hi] = value >> 32;
25356 value_hi_lo[lo] = value & lower_32bits;
25358 for (i = 0; i < 2; i++)
25360 HOST_WIDE_INT sub_value = value_hi_lo[i];
25362 if (sub_value & sign_bit)
25363 sub_value |= upper_32bits;
25365 op2_hi_lo[i] = GEN_INT (sub_value);
25367 /* If this is an AND instruction, check to see if we need to load
25368 the value in a register. */
25369 if (code == AND && sub_value != -1 && sub_value != 0
25370 && !and_operand (op2_hi_lo[i], SImode))
25371 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25376 for (i = 0; i < 2; i++)
25378 /* Split large IOR/XOR operations. */
25379 if ((code == IOR || code == XOR)
25380 && CONST_INT_P (op2_hi_lo[i])
25381 && !complement_final_p
25382 && !complement_op1_p
25383 && !complement_op2_p
25384 && !logical_const_operand (op2_hi_lo[i], SImode))
25386 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25387 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25388 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25389 rtx tmp = gen_reg_rtx (SImode);
25391 /* Make sure the constant is sign extended. */
25392 if ((hi_16bits & sign_bit) != 0)
25393 hi_16bits |= upper_32bits;
25395 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25396 code, SImode, false, false, false);
25398 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25399 code, SImode, false, false, false);
25401 else
25402 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25403 code, SImode, complement_final_p,
25404 complement_op1_p, complement_op2_p);
25407 return;
25410 /* Split the insns that make up boolean operations operating on multiple GPR
25411 registers. The boolean MD patterns ensure that the inputs either are
25412 exactly the same as the output registers, or there is no overlap.
25414 OPERANDS is an array containing the destination and two input operands.
25415 CODE is the base operation (AND, IOR, XOR, NOT).
25416 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25417 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25418 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25420 void
25421 rs6000_split_logical (rtx operands[3],
25422 enum rtx_code code,
25423 bool complement_final_p,
25424 bool complement_op1_p,
25425 bool complement_op2_p)
25427 machine_mode mode = GET_MODE (operands[0]);
25428 machine_mode sub_mode;
25429 rtx op0, op1, op2;
25430 int sub_size, regno0, regno1, nregs, i;
25432 /* If this is DImode, use the specialized version that can run before
25433 register allocation. */
25434 if (mode == DImode && !TARGET_POWERPC64)
25436 rs6000_split_logical_di (operands, code, complement_final_p,
25437 complement_op1_p, complement_op2_p);
25438 return;
25441 op0 = operands[0];
25442 op1 = operands[1];
25443 op2 = (code == NOT) ? NULL_RTX : operands[2];
25444 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25445 sub_size = GET_MODE_SIZE (sub_mode);
25446 regno0 = REGNO (op0);
25447 regno1 = REGNO (op1);
25449 gcc_assert (reload_completed);
25450 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25451 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25453 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25454 gcc_assert (nregs > 1);
25456 if (op2 && REG_P (op2))
25457 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25459 for (i = 0; i < nregs; i++)
25461 int offset = i * sub_size;
25462 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25463 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25464 rtx sub_op2 = ((code == NOT)
25465 ? NULL_RTX
25466 : simplify_subreg (sub_mode, op2, mode, offset));
25468 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25469 complement_final_p, complement_op1_p,
25470 complement_op2_p);
25473 return;
25477 /* Return true if the peephole2 can combine a load involving a combination of
25478 an addis instruction and a load with an offset that can be fused together on
25479 a power8. */
25481 bool
25482 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25483 rtx addis_value, /* addis value. */
25484 rtx target, /* target register that is loaded. */
25485 rtx mem) /* bottom part of the memory addr. */
25487 rtx addr;
25488 rtx base_reg;
25490 /* Validate arguments. */
25491 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25492 return false;
25494 if (!base_reg_operand (target, GET_MODE (target)))
25495 return false;
25497 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25498 return false;
25500 /* Allow sign/zero extension. */
25501 if (GET_CODE (mem) == ZERO_EXTEND
25502 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25503 mem = XEXP (mem, 0);
25505 if (!MEM_P (mem))
25506 return false;
25508 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25509 return false;
25511 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25512 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25513 return false;
25515 /* Validate that the register used to load the high value is either the
25516 register being loaded, or we can safely replace its use.
25518 This function is only called from the peephole2 pass and we assume that
25519 there are 2 instructions in the peephole (addis and load), so we want to
25520 check if the target register was not used in the memory address and the
25521 register to hold the addis result is dead after the peephole. */
25522 if (REGNO (addis_reg) != REGNO (target))
25524 if (reg_mentioned_p (target, mem))
25525 return false;
25527 if (!peep2_reg_dead_p (2, addis_reg))
25528 return false;
25530 /* If the target register being loaded is the stack pointer, we must
25531 avoid loading any other value into it, even temporarily. */
25532 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25533 return false;
25536 base_reg = XEXP (addr, 0);
25537 return REGNO (addis_reg) == REGNO (base_reg);
25540 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25541 sequence. We adjust the addis register to use the target register. If the
25542 load sign extends, we adjust the code to do the zero extending load, and an
25543 explicit sign extension later since the fusion only covers zero extending
25544 loads.
25546 The operands are:
25547 operands[0] register set with addis (to be replaced with target)
25548 operands[1] value set via addis
25549 operands[2] target register being loaded
25550 operands[3] D-form memory reference using operands[0]. */
25552 void
25553 expand_fusion_gpr_load (rtx *operands)
25555 rtx addis_value = operands[1];
25556 rtx target = operands[2];
25557 rtx orig_mem = operands[3];
25558 rtx new_addr, new_mem, orig_addr, offset;
25559 enum rtx_code plus_or_lo_sum;
25560 machine_mode target_mode = GET_MODE (target);
25561 machine_mode extend_mode = target_mode;
25562 machine_mode ptr_mode = Pmode;
25563 enum rtx_code extend = UNKNOWN;
25565 if (GET_CODE (orig_mem) == ZERO_EXTEND
25566 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25568 extend = GET_CODE (orig_mem);
25569 orig_mem = XEXP (orig_mem, 0);
25570 target_mode = GET_MODE (orig_mem);
25573 gcc_assert (MEM_P (orig_mem));
25575 orig_addr = XEXP (orig_mem, 0);
25576 plus_or_lo_sum = GET_CODE (orig_addr);
25577 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25579 offset = XEXP (orig_addr, 1);
25580 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25581 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25583 if (extend != UNKNOWN)
25584 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25586 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25587 UNSPEC_FUSION_GPR);
25588 emit_insn (gen_rtx_SET (target, new_mem));
25590 if (extend == SIGN_EXTEND)
25592 int sub_off = ((BYTES_BIG_ENDIAN)
25593 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25594 : 0);
25595 rtx sign_reg
25596 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25598 emit_insn (gen_rtx_SET (target,
25599 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25602 return;
25605 /* Emit the addis instruction that will be part of a fused instruction
25606 sequence. */
25608 void
25609 emit_fusion_addis (rtx target, rtx addis_value)
25611 rtx fuse_ops[10];
25612 const char *addis_str = NULL;
25614 /* Emit the addis instruction. */
25615 fuse_ops[0] = target;
25616 if (satisfies_constraint_L (addis_value))
25618 fuse_ops[1] = addis_value;
25619 addis_str = "lis %0,%v1";
25622 else if (GET_CODE (addis_value) == PLUS)
25624 rtx op0 = XEXP (addis_value, 0);
25625 rtx op1 = XEXP (addis_value, 1);
25627 if (REG_P (op0) && CONST_INT_P (op1)
25628 && satisfies_constraint_L (op1))
25630 fuse_ops[1] = op0;
25631 fuse_ops[2] = op1;
25632 addis_str = "addis %0,%1,%v2";
25636 else if (GET_CODE (addis_value) == HIGH)
25638 rtx value = XEXP (addis_value, 0);
25639 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25641 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25642 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25643 if (TARGET_ELF)
25644 addis_str = "addis %0,%2,%1@toc@ha";
25646 else if (TARGET_XCOFF)
25647 addis_str = "addis %0,%1@u(%2)";
25649 else
25650 gcc_unreachable ();
25653 else if (GET_CODE (value) == PLUS)
25655 rtx op0 = XEXP (value, 0);
25656 rtx op1 = XEXP (value, 1);
25658 if (GET_CODE (op0) == UNSPEC
25659 && XINT (op0, 1) == UNSPEC_TOCREL
25660 && CONST_INT_P (op1))
25662 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25663 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25664 fuse_ops[3] = op1;
25665 if (TARGET_ELF)
25666 addis_str = "addis %0,%2,%1+%3@toc@ha";
25668 else if (TARGET_XCOFF)
25669 addis_str = "addis %0,%1+%3@u(%2)";
25671 else
25672 gcc_unreachable ();
25676 else if (satisfies_constraint_L (value))
25678 fuse_ops[1] = value;
25679 addis_str = "lis %0,%v1";
25682 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25684 fuse_ops[1] = value;
25685 addis_str = "lis %0,%1@ha";
25689 if (!addis_str)
25690 fatal_insn ("Could not generate addis value for fusion", addis_value);
25692 output_asm_insn (addis_str, fuse_ops);
25695 /* Emit a D-form load or store instruction that is the second instruction
25696 of a fusion sequence. */
25698 static void
25699 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25701 rtx fuse_ops[10];
25702 char insn_template[80];
25704 fuse_ops[0] = load_reg;
25705 fuse_ops[1] = addis_reg;
25707 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25709 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25710 fuse_ops[2] = offset;
25711 output_asm_insn (insn_template, fuse_ops);
25714 else if (GET_CODE (offset) == UNSPEC
25715 && XINT (offset, 1) == UNSPEC_TOCREL)
25717 if (TARGET_ELF)
25718 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25720 else if (TARGET_XCOFF)
25721 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25723 else
25724 gcc_unreachable ();
25726 fuse_ops[2] = XVECEXP (offset, 0, 0);
25727 output_asm_insn (insn_template, fuse_ops);
25730 else if (GET_CODE (offset) == PLUS
25731 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25732 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25733 && CONST_INT_P (XEXP (offset, 1)))
25735 rtx tocrel_unspec = XEXP (offset, 0);
25736 if (TARGET_ELF)
25737 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25739 else if (TARGET_XCOFF)
25740 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25742 else
25743 gcc_unreachable ();
25745 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25746 fuse_ops[3] = XEXP (offset, 1);
25747 output_asm_insn (insn_template, fuse_ops);
25750 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25752 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25754 fuse_ops[2] = offset;
25755 output_asm_insn (insn_template, fuse_ops);
25758 else
25759 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25761 return;
25764 /* Given an address, convert it into the addis and load offset parts. Addresses
25765 created during the peephole2 process look like:
25766 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25767 (unspec [(...)] UNSPEC_TOCREL)) */
25769 static void
25770 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25772 rtx hi, lo;
25774 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25776 hi = XEXP (addr, 0);
25777 lo = XEXP (addr, 1);
25779 else
25780 gcc_unreachable ();
25782 *p_hi = hi;
25783 *p_lo = lo;
25786 /* Return a string to fuse an addis instruction with a gpr load to the same
25787 register that we loaded up the addis instruction. The address that is used
25788 is the logical address that was formed during peephole2:
25789 (lo_sum (high) (low-part))
25791 The code is complicated, so we call output_asm_insn directly, and just
25792 return "". */
25794 const char *
25795 emit_fusion_gpr_load (rtx target, rtx mem)
25797 rtx addis_value;
25798 rtx addr;
25799 rtx load_offset;
25800 const char *load_str = NULL;
25801 machine_mode mode;
25803 if (GET_CODE (mem) == ZERO_EXTEND)
25804 mem = XEXP (mem, 0);
25806 gcc_assert (REG_P (target) && MEM_P (mem));
25808 addr = XEXP (mem, 0);
25809 fusion_split_address (addr, &addis_value, &load_offset);
25811 /* Now emit the load instruction to the same register. */
25812 mode = GET_MODE (mem);
25813 switch (mode)
25815 case E_QImode:
25816 load_str = "lbz";
25817 break;
25819 case E_HImode:
25820 load_str = "lhz";
25821 break;
25823 case E_SImode:
25824 case E_SFmode:
25825 load_str = "lwz";
25826 break;
25828 case E_DImode:
25829 case E_DFmode:
25830 gcc_assert (TARGET_POWERPC64);
25831 load_str = "ld";
25832 break;
25834 default:
25835 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25838 /* Emit the addis instruction. */
25839 emit_fusion_addis (target, addis_value);
25841 /* Emit the D-form load instruction. */
25842 emit_fusion_load (target, target, load_offset, load_str);
25844 return "";
25848 #ifdef RS6000_GLIBC_ATOMIC_FENV
25849 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25850 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25851 #endif
25853 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25855 static void
25856 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25858 if (!TARGET_HARD_FLOAT)
25860 #ifdef RS6000_GLIBC_ATOMIC_FENV
25861 if (atomic_hold_decl == NULL_TREE)
25863 atomic_hold_decl
25864 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25865 get_identifier ("__atomic_feholdexcept"),
25866 build_function_type_list (void_type_node,
25867 double_ptr_type_node,
25868 NULL_TREE));
25869 TREE_PUBLIC (atomic_hold_decl) = 1;
25870 DECL_EXTERNAL (atomic_hold_decl) = 1;
25873 if (atomic_clear_decl == NULL_TREE)
25875 atomic_clear_decl
25876 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25877 get_identifier ("__atomic_feclearexcept"),
25878 build_function_type_list (void_type_node,
25879 NULL_TREE));
25880 TREE_PUBLIC (atomic_clear_decl) = 1;
25881 DECL_EXTERNAL (atomic_clear_decl) = 1;
25884 tree const_double = build_qualified_type (double_type_node,
25885 TYPE_QUAL_CONST);
25886 tree const_double_ptr = build_pointer_type (const_double);
25887 if (atomic_update_decl == NULL_TREE)
25889 atomic_update_decl
25890 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25891 get_identifier ("__atomic_feupdateenv"),
25892 build_function_type_list (void_type_node,
25893 const_double_ptr,
25894 NULL_TREE));
25895 TREE_PUBLIC (atomic_update_decl) = 1;
25896 DECL_EXTERNAL (atomic_update_decl) = 1;
25899 tree fenv_var = create_tmp_var_raw (double_type_node);
25900 TREE_ADDRESSABLE (fenv_var) = 1;
25901 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25903 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25904 *clear = build_call_expr (atomic_clear_decl, 0);
25905 *update = build_call_expr (atomic_update_decl, 1,
25906 fold_convert (const_double_ptr, fenv_addr));
25907 #endif
25908 return;
25911 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25912 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25913 tree call_mffs = build_call_expr (mffs, 0);
25915 /* Generates the equivalent of feholdexcept (&fenv_var)
25917 *fenv_var = __builtin_mffs ();
25918 double fenv_hold;
25919 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25920 __builtin_mtfsf (0xff, fenv_hold); */
25922 /* Mask to clear everything except for the rounding modes and non-IEEE
25923 arithmetic flag. */
25924 const unsigned HOST_WIDE_INT hold_exception_mask =
25925 HOST_WIDE_INT_C (0xffffffff00000007);
25927 tree fenv_var = create_tmp_var_raw (double_type_node);
25929 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
25931 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
25932 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25933 build_int_cst (uint64_type_node,
25934 hold_exception_mask));
25936 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25937 fenv_llu_and);
25939 tree hold_mtfsf = build_call_expr (mtfsf, 2,
25940 build_int_cst (unsigned_type_node, 0xff),
25941 fenv_hold_mtfsf);
25943 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
25945 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25947 double fenv_clear = __builtin_mffs ();
25948 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25949 __builtin_mtfsf (0xff, fenv_clear); */
25951 /* Mask to clear everything except for the rounding modes and non-IEEE
25952 arithmetic flag. */
25953 const unsigned HOST_WIDE_INT clear_exception_mask =
25954 HOST_WIDE_INT_C (0xffffffff00000000);
25956 tree fenv_clear = create_tmp_var_raw (double_type_node);
25958 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
25960 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
25961 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
25962 fenv_clean_llu,
25963 build_int_cst (uint64_type_node,
25964 clear_exception_mask));
25966 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25967 fenv_clear_llu_and);
25969 tree clear_mtfsf = build_call_expr (mtfsf, 2,
25970 build_int_cst (unsigned_type_node, 0xff),
25971 fenv_clear_mtfsf);
25973 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
25975 /* Generates the equivalent of feupdateenv (&fenv_var)
25977 double old_fenv = __builtin_mffs ();
25978 double fenv_update;
25979 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25980 (*(uint64_t*)fenv_var 0x1ff80fff);
25981 __builtin_mtfsf (0xff, fenv_update); */
25983 const unsigned HOST_WIDE_INT update_exception_mask =
25984 HOST_WIDE_INT_C (0xffffffff1fffff00);
25985 const unsigned HOST_WIDE_INT new_exception_mask =
25986 HOST_WIDE_INT_C (0x1ff80fff);
25988 tree old_fenv = create_tmp_var_raw (double_type_node);
25989 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
25991 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
25992 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
25993 build_int_cst (uint64_type_node,
25994 update_exception_mask));
25996 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25997 build_int_cst (uint64_type_node,
25998 new_exception_mask));
26000 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26001 old_llu_and, new_llu_and);
26003 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26004 new_llu_mask);
26006 tree update_mtfsf = build_call_expr (mtfsf, 2,
26007 build_int_cst (unsigned_type_node, 0xff),
26008 fenv_update_mtfsf);
26010 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26013 void
26014 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26016 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26018 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26019 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26021 /* The destination of the vmrgew instruction layout is:
26022 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26023 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26024 vmrgew instruction will be correct. */
26025 if (BYTES_BIG_ENDIAN)
26027 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26028 GEN_INT (0)));
26029 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26030 GEN_INT (3)));
26032 else
26034 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26035 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26038 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26039 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26041 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26042 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26044 if (BYTES_BIG_ENDIAN)
26045 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26046 else
26047 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26050 void
26051 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26053 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26055 rtx_tmp0 = gen_reg_rtx (V2DImode);
26056 rtx_tmp1 = gen_reg_rtx (V2DImode);
26058 /* The destination of the vmrgew instruction layout is:
26059 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26060 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26061 vmrgew instruction will be correct. */
26062 if (BYTES_BIG_ENDIAN)
26064 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26065 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26067 else
26069 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26070 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26073 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26074 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26076 if (signed_convert)
26078 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26079 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26081 else
26083 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26084 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26087 if (BYTES_BIG_ENDIAN)
26088 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26089 else
26090 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26093 void
26094 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26095 rtx src2)
26097 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26099 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26100 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26102 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26103 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26105 rtx_tmp2 = gen_reg_rtx (V4SImode);
26106 rtx_tmp3 = gen_reg_rtx (V4SImode);
26108 if (signed_convert)
26110 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26111 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26113 else
26115 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26116 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26119 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26122 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26124 static bool
26125 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26126 optimization_type opt_type)
26128 switch (op)
26130 case rsqrt_optab:
26131 return (opt_type == OPTIMIZE_FOR_SPEED
26132 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26134 default:
26135 return true;
26139 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26141 static HOST_WIDE_INT
26142 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26144 if (TREE_CODE (exp) == STRING_CST
26145 && (STRICT_ALIGNMENT || !optimize_size))
26146 return MAX (align, BITS_PER_WORD);
26147 return align;
26150 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26152 static HOST_WIDE_INT
26153 rs6000_starting_frame_offset (void)
26155 if (FRAME_GROWS_DOWNWARD)
26156 return 0;
26157 return RS6000_STARTING_FRAME_OFFSET;
26161 /* Create an alias for a mangled name where we have changed the mangling (in
26162 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26163 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26165 #if TARGET_ELF && RS6000_WEAK
26166 static void
26167 rs6000_globalize_decl_name (FILE * stream, tree decl)
26169 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26171 targetm.asm_out.globalize_label (stream, name);
26173 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26175 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26176 const char *old_name;
26178 ieee128_mangling_gcc_8_1 = true;
26179 lang_hooks.set_decl_assembler_name (decl);
26180 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26181 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26182 ieee128_mangling_gcc_8_1 = false;
26184 if (strcmp (name, old_name) != 0)
26186 fprintf (stream, "\t.weak %s\n", old_name);
26187 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26191 #endif
26194 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26195 function names from <foo>l to <foo>f128 if the default long double type is
26196 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26197 include file switches the names on systems that support long double as IEEE
26198 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26199 In the future, glibc will export names like __ieee128_sinf128 and we can
26200 switch to using those instead of using sinf128, which pollutes the user's
26201 namespace.
26203 This will switch the names for Fortran math functions as well (which doesn't
26204 use math.h). However, Fortran needs other changes to the compiler and
26205 library before you can switch the real*16 type at compile time.
26207 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26208 only do this if the default is that long double is IBM extended double, and
26209 the user asked for IEEE 128-bit. */
26211 static tree
26212 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26214 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26215 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26217 size_t len = IDENTIFIER_LENGTH (id);
26218 const char *name = IDENTIFIER_POINTER (id);
26220 if (name[len - 1] == 'l')
26222 bool uses_ieee128_p = false;
26223 tree type = TREE_TYPE (decl);
26224 machine_mode ret_mode = TYPE_MODE (type);
26226 /* See if the function returns a IEEE 128-bit floating point type or
26227 complex type. */
26228 if (ret_mode == TFmode || ret_mode == TCmode)
26229 uses_ieee128_p = true;
26230 else
26232 function_args_iterator args_iter;
26233 tree arg;
26235 /* See if the function passes a IEEE 128-bit floating point type
26236 or complex type. */
26237 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26239 machine_mode arg_mode = TYPE_MODE (arg);
26240 if (arg_mode == TFmode || arg_mode == TCmode)
26242 uses_ieee128_p = true;
26243 break;
26248 /* If we passed or returned an IEEE 128-bit floating point type,
26249 change the name. */
26250 if (uses_ieee128_p)
26252 char *name2 = (char *) alloca (len + 4);
26253 memcpy (name2, name, len - 1);
26254 strcpy (name2 + len - 1, "f128");
26255 id = get_identifier (name2);
26260 return id;
26263 /* Predict whether the given loop in gimple will be transformed in the RTL
26264 doloop_optimize pass. */
26266 static bool
26267 rs6000_predict_doloop_p (struct loop *loop)
26269 gcc_assert (loop);
26271 /* On rs6000, targetm.can_use_doloop_p is actually
26272 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26273 if (loop->inner != NULL)
26275 if (dump_file && (dump_flags & TDF_DETAILS))
26276 fprintf (dump_file, "Predict doloop failure due to"
26277 " loop nesting.\n");
26278 return false;
26281 return true;
26284 struct gcc_target targetm = TARGET_INITIALIZER;
26286 #include "gt-rs6000.h"