1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
64 #include "sched-int.h"
66 #include "tree-pass.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
75 #include "tree-iterator.h"
77 #include "case-cfn-macros.h"
79 #include "fold-const-call.h"
81 #include "tree-ssanames.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
87 #include "symbol-summary.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx
legitimize_dllimport_symbol (rtx
, bool);
103 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx
);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
122 const struct processor_costs
*ix86_tune_cost
= NULL
;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs
*ix86_cost
= NULL
;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
134 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
135 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
143 AREG
, DREG
, CREG
, BREG
,
145 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
147 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
148 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
152 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
153 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
155 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
156 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
158 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
159 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
160 /* SSE REX registers */
161 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
162 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
165 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
166 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
167 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
168 /* Mask registers. */
169 ALL_MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
170 MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
178 0, 2, 1, 3, 6, 7, 4, 5,
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
183 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
185 21, 22, 23, 24, 25, 26, 27, 28,
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
190 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
191 /* extended sse registers */
192 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
193 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
196 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
199 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
209 0, 1, 2, 3, 4, 5, 6, 7,
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
214 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
216 17, 18, 19, 20, 21, 22, 23, 24,
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
288 0, 2, 1, 3, 6, 7, 5, 4,
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM
, 9,
293 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
295 21, 22, 23, 24, 25, 26, 27, 28,
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
300 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
301 /* extended sse registers */
302 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
303 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
306 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
309 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers
[6] =
318 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers
[4] =
323 CX_REG
, DX_REG
, R8_REG
, R9_REG
326 static int const x86_64_int_return_registers
[4] =
328 AX_REG
, DX_REG
, DI_REG
, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry
{
337 struct stack_local_entry
*next
;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule
;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune
;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch
;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse
;
352 rtx (*ix86_gen_leave
) (void);
353 rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
354 rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
355 rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
356 rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
357 rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
358 rtx (*ix86_gen_monitorx
) (rtx
, rtx
, rtx
);
359 rtx (*ix86_gen_clzero
) (rtx
);
360 rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
361 rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
362 rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
363 rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
364 rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
365 rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
367 /* Preferred alignment for stack boundary in bits. */
368 unsigned int ix86_preferred_stack_boundary
;
370 /* Alignment for incoming stack boundary in bits specified at
372 unsigned int ix86_user_incoming_stack_boundary
;
374 /* Default alignment for incoming stack boundary in bits. */
375 unsigned int ix86_default_incoming_stack_boundary
;
377 /* Alignment for incoming stack boundary in bits. */
378 unsigned int ix86_incoming_stack_boundary
;
380 /* Calling abi specific va_list type nodes. */
381 tree sysv_va_list_type_node
;
382 tree ms_va_list_type_node
;
384 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
385 char internal_label_prefix
[16];
386 int internal_label_prefix_len
;
388 /* Fence to use after loop using movnt. */
391 /* Register class used for passing given 64bit part of the argument.
392 These represent classes as documented by the PS ABI, with the exception
393 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
394 use SF or DFmode move instead of DImode to avoid reformatting penalties.
396 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
397 whenever possible (upper half does contain padding). */
398 enum x86_64_reg_class
401 X86_64_INTEGER_CLASS
,
402 X86_64_INTEGERSI_CLASS
,
409 X86_64_COMPLEX_X87_CLASS
,
413 #define MAX_CLASSES 8
415 /* Table of constants used by fldpi, fldln2, etc.... */
416 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
417 static bool ext_80387_constants_init
;
420 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
421 static bool ix86_function_value_regno_p (const unsigned int);
422 static unsigned int ix86_function_arg_boundary (machine_mode
,
424 static rtx
ix86_static_chain (const_tree
, bool);
425 static int ix86_function_regparm (const_tree
, const_tree
);
426 static void ix86_compute_frame_layout (void);
427 static tree
ix86_canonical_va_list_type (tree
);
428 static unsigned int split_stack_prologue_scratch_regno (void);
429 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
431 static bool ix86_can_inline_p (tree
, tree
);
432 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
435 /* Whether -mtune= or -march= were specified */
436 int ix86_tune_defaulted
;
437 int ix86_arch_specified
;
439 /* Return true if a red-zone is in use. We can't use red-zone when
440 there are local indirect jumps, like "indirect_jump" or "tablejump",
441 which jumps to another place in the function, since "call" in the
442 indirect thunk pushes the return address onto stack, destroying
445 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
446 for CALL, in red-zone, we can allow local indirect jumps with
450 ix86_using_red_zone (void)
452 return (TARGET_RED_ZONE
453 && !TARGET_64BIT_MS_ABI
454 && (!cfun
->machine
->has_local_indirect_jump
455 || cfun
->machine
->indirect_branch_type
== indirect_branch_keep
));
458 /* Return true, if profiling code should be emitted before
459 prologue. Otherwise it returns false.
460 Note: For x86 with "hotfix" it is sorried. */
462 ix86_profile_before_prologue (void)
464 return flag_fentry
!= 0;
467 /* Update register usage after having seen the compiler flags. */
470 ix86_conditional_register_usage (void)
474 /* If there are no caller-saved registers, preserve all registers.
475 except fixed_regs and registers used for function return value
476 since aggregate_value_p checks call_used_regs[regno] on return
478 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
479 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
480 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
481 call_used_regs
[i
] = 0;
483 /* For 32-bit targets, disable the REX registers. */
486 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
487 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
488 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
489 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
490 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
491 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
494 /* See the definition of CALL_USED_REGISTERS in i386.h. */
495 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
497 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
499 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
501 /* Set/reset conditionally defined registers from
502 CALL_USED_REGISTERS initializer. */
503 if (call_used_regs
[i
] > 1)
504 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
506 /* Calculate registers of CLOBBERED_REGS register set
507 as call used registers from GENERAL_REGS register set. */
508 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
509 && call_used_regs
[i
])
510 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
513 /* If MMX is disabled, disable the registers. */
515 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
516 reg_class_contents
[(int) MMX_REGS
]);
518 /* If SSE is disabled, disable the registers. */
520 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
521 reg_class_contents
[(int) ALL_SSE_REGS
]);
523 /* If the FPU is disabled, disable the registers. */
524 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
525 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
526 reg_class_contents
[(int) FLOAT_REGS
]);
528 /* If AVX512F is disabled, disable the registers. */
529 if (! TARGET_AVX512F
)
531 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
532 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
534 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
535 reg_class_contents
[(int) ALL_MASK_REGS
]);
539 /* Canonicalize a comparison from one we don't have to one we do have. */
542 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
543 bool op0_preserve_value
)
545 /* The order of operands in x87 ficom compare is forced by combine in
546 simplify_comparison () function. Float operator is treated as RTX_OBJ
547 with a precedence over other operators and is always put in the first
548 place. Swap condition and operands to match ficom instruction. */
549 if (!op0_preserve_value
550 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
552 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
554 /* We are called only for compares that are split to SAHF instruction.
555 Ensure that we have setcc/jcc insn for the swapped condition. */
556 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
558 std::swap (*op0
, *op1
);
565 /* Hook to determine if one function can safely inline another. */
568 ix86_can_inline_p (tree caller
, tree callee
)
570 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
571 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
573 /* Changes of those flags can be tolerated for always inlines. Lets hope
574 user knows what he is doing. */
575 const unsigned HOST_WIDE_INT always_inline_safe_mask
576 = (MASK_USE_8BIT_IDIV
| MASK_ACCUMULATE_OUTGOING_ARGS
577 | MASK_NO_ALIGN_STRINGOPS
| MASK_AVX256_SPLIT_UNALIGNED_LOAD
578 | MASK_AVX256_SPLIT_UNALIGNED_STORE
| MASK_CLD
579 | MASK_NO_FANCY_MATH_387
| MASK_IEEE_FP
| MASK_INLINE_ALL_STRINGOPS
580 | MASK_INLINE_STRINGOPS_DYNAMICALLY
| MASK_RECIP
| MASK_STACK_PROBE
581 | MASK_STV
| MASK_TLS_DIRECT_SEG_REFS
| MASK_VZEROUPPER
582 | MASK_NO_PUSH_ARGS
| MASK_OMIT_LEAF_FRAME_POINTER
);
586 callee_tree
= target_option_default_node
;
588 caller_tree
= target_option_default_node
;
589 if (callee_tree
== caller_tree
)
592 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
593 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
596 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
597 && lookup_attribute ("always_inline",
598 DECL_ATTRIBUTES (callee
)));
600 cgraph_node
*callee_node
= cgraph_node::get (callee
);
601 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
602 function can inline a SSE2 function but a SSE2 function can't inline
604 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
605 != callee_opts
->x_ix86_isa_flags
)
606 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
607 != callee_opts
->x_ix86_isa_flags2
))
610 /* See if we have the same non-isa options. */
611 else if ((!always_inline
612 && caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
613 || (caller_opts
->x_target_flags
& ~always_inline_safe_mask
)
614 != (callee_opts
->x_target_flags
& ~always_inline_safe_mask
))
617 /* See if arch, tune, etc. are the same. */
618 else if (caller_opts
->arch
!= callee_opts
->arch
)
621 else if (!always_inline
&& caller_opts
->tune
!= callee_opts
->tune
)
624 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
625 /* If the calle doesn't use FP expressions differences in
626 ix86_fpmath can be ignored. We are called from FEs
627 for multi-versioning call optimization, so beware of
628 ipa_fn_summaries not available. */
629 && (! ipa_fn_summaries
630 || ipa_fn_summaries
->get (callee_node
) == NULL
631 || ipa_fn_summaries
->get (callee_node
)->fp_expressions
))
634 else if (!always_inline
635 && caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
644 /* Return true if this goes in large data/bss. */
647 ix86_in_large_data_p (tree exp
)
649 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
652 if (exp
== NULL_TREE
)
655 /* Functions are never large data. */
656 if (TREE_CODE (exp
) == FUNCTION_DECL
)
659 /* Automatic variables are never large data. */
660 if (VAR_P (exp
) && !is_global_var (exp
))
663 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
665 const char *section
= DECL_SECTION_NAME (exp
);
666 if (strcmp (section
, ".ldata") == 0
667 || strcmp (section
, ".lbss") == 0)
673 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
675 /* If this is an incomplete type with size 0, then we can't put it
676 in data because it might be too big when completed. Also,
677 int_size_in_bytes returns -1 if size can vary or is larger than
678 an integer in which case also it is safer to assume that it goes in
680 if (size
<= 0 || size
> ix86_section_threshold
)
687 /* i386-specific section flag to mark large sections. */
688 #define SECTION_LARGE SECTION_MACH_DEP
690 /* Switch to the appropriate section for output of DECL.
691 DECL is either a `VAR_DECL' node or a constant of some sort.
692 RELOC indicates whether forming the initial value of DECL requires
693 link-time relocations. */
695 ATTRIBUTE_UNUSED
static section
*
696 x86_64_elf_select_section (tree decl
, int reloc
,
697 unsigned HOST_WIDE_INT align
)
699 if (ix86_in_large_data_p (decl
))
701 const char *sname
= NULL
;
702 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
703 switch (categorize_decl_for_section (decl
, reloc
))
708 case SECCAT_DATA_REL
:
709 sname
= ".ldata.rel";
711 case SECCAT_DATA_REL_LOCAL
:
712 sname
= ".ldata.rel.local";
714 case SECCAT_DATA_REL_RO
:
715 sname
= ".ldata.rel.ro";
717 case SECCAT_DATA_REL_RO_LOCAL
:
718 sname
= ".ldata.rel.ro.local";
722 flags
|= SECTION_BSS
;
725 case SECCAT_RODATA_MERGE_STR
:
726 case SECCAT_RODATA_MERGE_STR_INIT
:
727 case SECCAT_RODATA_MERGE_CONST
:
729 flags
&= ~SECTION_WRITE
;
738 /* We don't split these for medium model. Place them into
739 default sections and hope for best. */
744 /* We might get called with string constants, but get_named_section
745 doesn't like them as they are not DECLs. Also, we need to set
746 flags in that case. */
748 return get_section (sname
, flags
, NULL
);
749 return get_named_section (decl
, sname
, reloc
);
752 return default_elf_select_section (decl
, reloc
, align
);
755 /* Select a set of attributes for section NAME based on the properties
756 of DECL and whether or not RELOC indicates that DECL's initializer
757 might contain runtime relocations. */
759 static unsigned int ATTRIBUTE_UNUSED
760 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
762 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
764 if (ix86_in_large_data_p (decl
))
765 flags
|= SECTION_LARGE
;
767 if (decl
== NULL_TREE
768 && (strcmp (name
, ".ldata.rel.ro") == 0
769 || strcmp (name
, ".ldata.rel.ro.local") == 0))
770 flags
|= SECTION_RELRO
;
772 if (strcmp (name
, ".lbss") == 0
773 || strncmp (name
, ".lbss.", 5) == 0
774 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
775 flags
|= SECTION_BSS
;
780 /* Build up a unique section name, expressed as a
781 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
782 RELOC indicates whether the initial value of EXP requires
783 link-time relocations. */
785 static void ATTRIBUTE_UNUSED
786 x86_64_elf_unique_section (tree decl
, int reloc
)
788 if (ix86_in_large_data_p (decl
))
790 const char *prefix
= NULL
;
791 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
792 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
794 switch (categorize_decl_for_section (decl
, reloc
))
797 case SECCAT_DATA_REL
:
798 case SECCAT_DATA_REL_LOCAL
:
799 case SECCAT_DATA_REL_RO
:
800 case SECCAT_DATA_REL_RO_LOCAL
:
801 prefix
= one_only
? ".ld" : ".ldata";
804 prefix
= one_only
? ".lb" : ".lbss";
807 case SECCAT_RODATA_MERGE_STR
:
808 case SECCAT_RODATA_MERGE_STR_INIT
:
809 case SECCAT_RODATA_MERGE_CONST
:
810 prefix
= one_only
? ".lr" : ".lrodata";
819 /* We don't split these for medium model. Place them into
820 default sections and hope for best. */
825 const char *name
, *linkonce
;
828 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
829 name
= targetm
.strip_name_encoding (name
);
831 /* If we're using one_only, then there needs to be a .gnu.linkonce
832 prefix to the section name. */
833 linkonce
= one_only
? ".gnu.linkonce" : "";
835 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
837 set_decl_section_name (decl
, string
);
841 default_unique_section (decl
, reloc
);
846 #ifndef LARGECOMM_SECTION_ASM_OP
847 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
850 /* This says how to output assembler code to declare an
851 uninitialized external linkage data object.
853 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
856 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
857 const char *name
, unsigned HOST_WIDE_INT size
,
860 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
861 && size
> (unsigned int)ix86_section_threshold
)
863 switch_to_section (get_named_section (decl
, ".lbss", 0));
864 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
867 fputs (COMMON_ASM_OP
, file
);
868 assemble_name (file
, name
);
869 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
870 size
, align
/ BITS_PER_UNIT
);
874 /* Utility function for targets to use in implementing
875 ASM_OUTPUT_ALIGNED_BSS. */
878 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
879 unsigned HOST_WIDE_INT size
, int align
)
881 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
882 && size
> (unsigned int)ix86_section_threshold
)
883 switch_to_section (get_named_section (decl
, ".lbss", 0));
885 switch_to_section (bss_section
);
886 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
887 #ifdef ASM_DECLARE_OBJECT_NAME
888 last_assemble_variable_decl
= decl
;
889 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
891 /* Standard thing is just output label for the object. */
892 ASM_OUTPUT_LABEL (file
, name
);
893 #endif /* ASM_DECLARE_OBJECT_NAME */
894 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
897 /* Decide whether we must probe the stack before any space allocation
898 on this target. It's essentially TARGET_STACK_PROBE except when
899 -fstack-check causes the stack to be already probed differently. */
902 ix86_target_stack_probe (void)
904 /* Do not probe the stack twice if static stack checking is enabled. */
905 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
908 return TARGET_STACK_PROBE
;
911 /* Decide whether we can make a sibling call to a function. DECL is the
912 declaration of the function being targeted by the call and EXP is the
913 CALL_EXPR representing the call. */
916 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
918 tree type
, decl_or_type
;
920 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
922 if (ix86_function_naked (current_function_decl
))
925 /* Sibling call isn't OK if there are no caller-saved registers
926 since all registers must be preserved before return. */
927 if (cfun
->machine
->no_caller_saved_registers
)
930 /* If we are generating position-independent code, we cannot sibcall
931 optimize direct calls to global functions, as the PLT requires
932 %ebx be live. (Darwin does not have a PLT.) */
940 /* If we need to align the outgoing stack, then sibcalling would
941 unalign the stack, which may break the called function. */
942 if (ix86_minimum_incoming_stack_boundary (true)
943 < PREFERRED_STACK_BOUNDARY
)
949 type
= TREE_TYPE (decl
);
953 /* We're looking at the CALL_EXPR, we need the type of the function. */
954 type
= CALL_EXPR_FN (exp
); /* pointer expression */
955 type
= TREE_TYPE (type
); /* pointer type */
956 type
= TREE_TYPE (type
); /* function type */
960 /* Check that the return value locations are the same. Like
961 if we are returning floats on the 80387 register stack, we cannot
962 make a sibcall from a function that doesn't return a float to a
963 function that does or, conversely, from a function that does return
964 a float to a function that doesn't; the necessary stack adjustment
965 would not be executed. This is also the place we notice
966 differences in the return value ABI. Note that it is ok for one
967 of the functions to have void return type as long as the return
968 value of the other is passed in a register. */
969 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
970 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
972 if (STACK_REG_P (a
) || STACK_REG_P (b
))
974 if (!rtx_equal_p (a
, b
))
977 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
979 else if (!rtx_equal_p (a
, b
))
984 /* The SYSV ABI has more call-clobbered registers;
985 disallow sibcalls from MS to SYSV. */
986 if (cfun
->machine
->call_abi
== MS_ABI
987 && ix86_function_type_abi (type
) == SYSV_ABI
)
992 /* If this call is indirect, we'll need to be able to use a
993 call-clobbered register for the address of the target function.
994 Make sure that all such registers are not used for passing
995 parameters. Note that DLLIMPORT functions and call to global
996 function via GOT slot are indirect. */
998 || (bind_global
&& flag_pic
&& !flag_plt
)
999 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
))
1000 || flag_force_indirect_call
)
1002 /* Check if regparm >= 3 since arg_reg_available is set to
1003 false if regparm == 0. If regparm is 1 or 2, there is
1004 always a call-clobbered register available.
1006 ??? The symbol indirect call doesn't need a call-clobbered
1007 register. But we don't know if this is a symbol indirect
1008 call or not here. */
1009 if (ix86_function_regparm (type
, decl
) >= 3
1010 && !cfun
->machine
->arg_reg_available
)
1015 /* Otherwise okay. That also includes certain types of indirect calls. */
1019 /* This function determines from TYPE the calling-convention. */
1022 ix86_get_callcvt (const_tree type
)
1024 unsigned int ret
= 0;
1029 return IX86_CALLCVT_CDECL
;
1031 attrs
= TYPE_ATTRIBUTES (type
);
1032 if (attrs
!= NULL_TREE
)
1034 if (lookup_attribute ("cdecl", attrs
))
1035 ret
|= IX86_CALLCVT_CDECL
;
1036 else if (lookup_attribute ("stdcall", attrs
))
1037 ret
|= IX86_CALLCVT_STDCALL
;
1038 else if (lookup_attribute ("fastcall", attrs
))
1039 ret
|= IX86_CALLCVT_FASTCALL
;
1040 else if (lookup_attribute ("thiscall", attrs
))
1041 ret
|= IX86_CALLCVT_THISCALL
;
1043 /* Regparam isn't allowed for thiscall and fastcall. */
1044 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
1046 if (lookup_attribute ("regparm", attrs
))
1047 ret
|= IX86_CALLCVT_REGPARM
;
1048 if (lookup_attribute ("sseregparm", attrs
))
1049 ret
|= IX86_CALLCVT_SSEREGPARM
;
1052 if (IX86_BASE_CALLCVT(ret
) != 0)
1056 is_stdarg
= stdarg_p (type
);
1057 if (TARGET_RTD
&& !is_stdarg
)
1058 return IX86_CALLCVT_STDCALL
| ret
;
1062 || TREE_CODE (type
) != METHOD_TYPE
1063 || ix86_function_type_abi (type
) != MS_ABI
)
1064 return IX86_CALLCVT_CDECL
| ret
;
1066 return IX86_CALLCVT_THISCALL
;
1069 /* Return 0 if the attributes for two types are incompatible, 1 if they
1070 are compatible, and 2 if they are nearly compatible (which causes a
1071 warning to be generated). */
1074 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
1076 unsigned int ccvt1
, ccvt2
;
1078 if (TREE_CODE (type1
) != FUNCTION_TYPE
1079 && TREE_CODE (type1
) != METHOD_TYPE
)
1082 ccvt1
= ix86_get_callcvt (type1
);
1083 ccvt2
= ix86_get_callcvt (type2
);
1086 if (ix86_function_regparm (type1
, NULL
)
1087 != ix86_function_regparm (type2
, NULL
))
1093 /* Return the regparm value for a function with the indicated TYPE and DECL.
1094 DECL may be NULL when calling function indirectly
1095 or considering a libcall. */
1098 ix86_function_regparm (const_tree type
, const_tree decl
)
1105 return (ix86_function_type_abi (type
) == SYSV_ABI
1106 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
1107 ccvt
= ix86_get_callcvt (type
);
1108 regparm
= ix86_regparm
;
1110 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
1112 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1115 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1119 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1121 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1124 /* Use register calling convention for local functions when possible. */
1126 && TREE_CODE (decl
) == FUNCTION_DECL
)
1128 cgraph_node
*target
= cgraph_node::get (decl
);
1130 target
= target
->function_symbol ();
1132 /* Caller and callee must agree on the calling convention, so
1133 checking here just optimize means that with
1134 __attribute__((optimize (...))) caller could use regparm convention
1135 and callee not, or vice versa. Instead look at whether the callee
1136 is optimized or not. */
1137 if (target
&& opt_for_fn (target
->decl
, optimize
)
1138 && !(profile_flag
&& !flag_fentry
))
1140 cgraph_local_info
*i
= &target
->local
;
1141 if (i
&& i
->local
&& i
->can_change_signature
)
1143 int local_regparm
, globals
= 0, regno
;
1145 /* Make sure no regparm register is taken by a
1146 fixed register variable. */
1147 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
1149 if (fixed_regs
[local_regparm
])
1152 /* We don't want to use regparm(3) for nested functions as
1153 these use a static chain pointer in the third argument. */
1154 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
1157 /* Save a register for the split stack. */
1158 if (flag_split_stack
)
1160 if (local_regparm
== 3)
1162 else if (local_regparm
== 2
1163 && DECL_STATIC_CHAIN (target
->decl
))
1167 /* Each fixed register usage increases register pressure,
1168 so less registers should be used for argument passing.
1169 This functionality can be overriden by an explicit
1171 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
1172 if (fixed_regs
[regno
])
1176 = globals
< local_regparm
? local_regparm
- globals
: 0;
1178 if (local_regparm
> regparm
)
1179 regparm
= local_regparm
;
1187 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1188 DFmode (2) arguments in SSE registers for a function with the
1189 indicated TYPE and DECL. DECL may be NULL when calling function
1190 indirectly or considering a libcall. Return -1 if any FP parameter
1191 should be rejected by error. This is used in siutation we imply SSE
1192 calling convetion but the function is called from another function with
1193 SSE disabled. Otherwise return 0. */
1196 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
1198 gcc_assert (!TARGET_64BIT
);
1200 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1201 by the sseregparm attribute. */
1202 if (TARGET_SSEREGPARM
1203 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
1210 error ("calling %qD with attribute sseregparm without "
1211 "SSE/SSE2 enabled", decl
);
1213 error ("calling %qT with attribute sseregparm without "
1214 "SSE/SSE2 enabled", type
);
1225 cgraph_node
*target
= cgraph_node::get (decl
);
1227 target
= target
->function_symbol ();
1229 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1230 (and DFmode for SSE2) arguments in SSE registers. */
1232 /* TARGET_SSE_MATH */
1233 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
1234 && opt_for_fn (target
->decl
, optimize
)
1235 && !(profile_flag
&& !flag_fentry
))
1237 cgraph_local_info
*i
= &target
->local
;
1238 if (i
&& i
->local
&& i
->can_change_signature
)
1240 /* Refuse to produce wrong code when local function with SSE enabled
1241 is called from SSE disabled function.
1242 FIXME: We need a way to detect these cases cross-ltrans partition
1243 and avoid using SSE calling conventions on local functions called
1244 from function with SSE disabled. For now at least delay the
1245 warning until we know we are going to produce wrong code.
1247 if (!TARGET_SSE
&& warn
)
1249 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
1250 ->x_ix86_isa_flags
) ? 2 : 1;
1257 /* Return true if EAX is live at the start of the function. Used by
1258 ix86_expand_prologue to determine if we need special help before
1259 calling allocate_stack_worker. */
1262 ix86_eax_live_at_start_p (void)
1264 /* Cheat. Don't bother working forward from ix86_function_regparm
1265 to the function type to whether an actual argument is located in
1266 eax. Instead just look at cfg info, which is still close enough
1267 to correct at this point. This gives false positives for broken
1268 functions that might use uninitialized data that happens to be
1269 allocated in eax, but who cares? */
1270 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
1274 ix86_keep_aggregate_return_pointer (tree fntype
)
1280 attr
= lookup_attribute ("callee_pop_aggregate_return",
1281 TYPE_ATTRIBUTES (fntype
));
1283 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
1285 /* For 32-bit MS-ABI the default is to keep aggregate
1287 if (ix86_function_type_abi (fntype
) == MS_ABI
)
1290 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
1293 /* Value is the number of bytes of arguments automatically
1294 popped when returning from a subroutine call.
1295 FUNDECL is the declaration node of the function (as a tree),
1296 FUNTYPE is the data type of the function (as a tree),
1297 or for a library call it is an identifier node for the subroutine name.
1298 SIZE is the number of bytes of arguments passed on the stack.
1300 On the 80386, the RTD insn may be used to pop them if the number
1301 of args is fixed, but if the number is variable then the caller
1302 must pop them all. RTD can't be used for library calls now
1303 because the library is compiled with the Unix compiler.
1304 Use of RTD is a selectable option, since it is incompatible with
1305 standard Unix calling sequences. If the option is not selected,
1306 the caller must always pop the args.
1308 The attribute stdcall is equivalent to RTD on a per module basis. */
1311 ix86_return_pops_args (tree fundecl
, tree funtype
, poly_int64 size
)
1315 /* None of the 64-bit ABIs pop arguments. */
1319 ccvt
= ix86_get_callcvt (funtype
);
1321 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
1322 | IX86_CALLCVT_THISCALL
)) != 0
1323 && ! stdarg_p (funtype
))
1326 /* Lose any fake structure return argument if it is passed on the stack. */
1327 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1328 && !ix86_keep_aggregate_return_pointer (funtype
))
1330 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1332 return GET_MODE_SIZE (Pmode
);
1338 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1341 ix86_legitimate_combined_insn (rtx_insn
*insn
)
1345 /* Check operand constraints in case hard registers were propagated
1346 into insn pattern. This check prevents combine pass from
1347 generating insn patterns with invalid hard register operands.
1348 These invalid insns can eventually confuse reload to error out
1349 with a spill failure. See also PRs 46829 and 46843. */
1351 gcc_assert (INSN_CODE (insn
) >= 0);
1353 extract_insn (insn
);
1354 preprocess_constraints (insn
);
1356 int n_operands
= recog_data
.n_operands
;
1357 int n_alternatives
= recog_data
.n_alternatives
;
1358 for (i
= 0; i
< n_operands
; i
++)
1360 rtx op
= recog_data
.operand
[i
];
1361 machine_mode mode
= GET_MODE (op
);
1362 const operand_alternative
*op_alt
;
1367 /* A unary operator may be accepted by the predicate, but it
1368 is irrelevant for matching constraints. */
1374 if (REG_P (SUBREG_REG (op
))
1375 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
1376 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
1377 GET_MODE (SUBREG_REG (op
)),
1380 op
= SUBREG_REG (op
);
1383 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
1386 op_alt
= recog_op_alt
;
1388 /* Operand has no constraints, anything is OK. */
1389 win
= !n_alternatives
;
1391 alternative_mask preferred
= get_preferred_alternatives (insn
);
1392 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
1394 if (!TEST_BIT (preferred
, j
))
1396 if (op_alt
[i
].anything_ok
1397 || (op_alt
[i
].matches
!= -1
1399 (recog_data
.operand
[i
],
1400 recog_data
.operand
[op_alt
[i
].matches
]))
1401 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
1415 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1417 static unsigned HOST_WIDE_INT
1418 ix86_asan_shadow_offset (void)
1420 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
1421 : HOST_WIDE_INT_C (0x7fff8000))
1422 : (HOST_WIDE_INT_1
<< 29);
1425 /* Argument support functions. */
1427 /* Return true when register may be used to pass function parameters. */
1429 ix86_function_arg_regno_p (int regno
)
1432 enum calling_abi call_abi
;
1433 const int *parm_regs
;
1438 return (regno
< REGPARM_MAX
1439 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1441 return (regno
< REGPARM_MAX
1442 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
1443 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
1444 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
1445 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
1448 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
1449 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
1452 /* TODO: The function should depend on current function ABI but
1453 builtins.c would need updating then. Therefore we use the
1455 call_abi
= ix86_cfun_abi ();
1457 /* RAX is used as hidden argument to va_arg functions. */
1458 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
1461 if (call_abi
== MS_ABI
)
1462 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
1464 parm_regs
= x86_64_int_parameter_registers
;
1466 for (i
= 0; i
< (call_abi
== MS_ABI
1467 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
1468 if (regno
== parm_regs
[i
])
1473 /* Return if we do not know how to pass TYPE solely in registers. */
1476 ix86_must_pass_in_stack (machine_mode mode
, const_tree type
)
1478 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
1481 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1482 The layout_type routine is crafty and tries to trick us into passing
1483 currently unsupported vector types on the stack by using TImode. */
1484 return (!TARGET_64BIT
&& mode
== TImode
1485 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
1488 /* It returns the size, in bytes, of the area reserved for arguments passed
1489 in registers for the function represented by fndecl dependent to the used
1492 ix86_reg_parm_stack_space (const_tree fndecl
)
1494 enum calling_abi call_abi
= SYSV_ABI
;
1495 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
1496 call_abi
= ix86_function_abi (fndecl
);
1498 call_abi
= ix86_function_type_abi (fndecl
);
1499 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
1504 /* We add this as a workaround in order to use libc_has_function
1507 ix86_libc_has_function (enum function_class fn_class
)
1509 return targetm
.libc_has_function (fn_class
);
1512 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1513 specifying the call abi used. */
1515 ix86_function_type_abi (const_tree fntype
)
1517 enum calling_abi abi
= ix86_abi
;
1519 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
1523 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
1526 if (TARGET_X32
&& !warned
)
1528 error ("X32 does not support %<ms_abi%> attribute");
1534 else if (abi
== MS_ABI
1535 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
1542 ix86_function_abi (const_tree fndecl
)
1544 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
1547 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1548 specifying the call abi used. */
1550 ix86_cfun_abi (void)
1552 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
1556 ix86_function_ms_hook_prologue (const_tree fn
)
1558 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
1560 if (decl_function_context (fn
) != NULL_TREE
)
1561 error_at (DECL_SOURCE_LOCATION (fn
),
1562 "%<ms_hook_prologue%> attribute is not compatible "
1563 "with nested function");
1571 ix86_function_naked (const_tree fn
)
1573 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
1579 /* Write the extra assembler code needed to declare a function properly. */
1582 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
1585 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
1589 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
1590 unsigned int filler_cc
= 0xcccccccc;
1592 for (i
= 0; i
< filler_count
; i
+= 4)
1593 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
1596 #ifdef SUBTARGET_ASM_UNWIND_INIT
1597 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
1600 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
1602 /* Output magic byte marker, if hot-patch attribute is set. */
1607 /* leaq [%rsp + 0], %rsp */
1608 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1613 /* movl.s %edi, %edi
1615 movl.s %esp, %ebp */
1616 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file
);
1621 /* Implementation of call abi switching target hook. Specific to FNDECL
1622 the specific call register sets are set. See also
1623 ix86_conditional_register_usage for more details. */
1625 ix86_call_abi_override (const_tree fndecl
)
1627 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
1630 /* Return 1 if pseudo register should be created and used to hold
1631 GOT address for PIC code. */
1633 ix86_use_pseudo_pic_reg (void)
1636 && (ix86_cmodel
== CM_SMALL_PIC
1643 /* Initialize large model PIC register. */
1646 ix86_init_large_pic_reg (unsigned int tmp_regno
)
1648 rtx_code_label
*label
;
1651 gcc_assert (Pmode
== DImode
);
1652 label
= gen_label_rtx ();
1654 LABEL_PRESERVE_P (label
) = 1;
1655 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
1656 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
1657 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
1659 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
1660 emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
1661 pic_offset_table_rtx
, tmp_reg
));
1662 const char *name
= LABEL_NAME (label
);
1663 PUT_CODE (label
, NOTE
);
1664 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
1665 NOTE_DELETED_LABEL_NAME (label
) = name
;
1668 /* Create and initialize PIC register if required. */
1670 ix86_init_pic_reg (void)
1675 if (!ix86_use_pseudo_pic_reg ())
1682 if (ix86_cmodel
== CM_LARGE_PIC
)
1683 ix86_init_large_pic_reg (R11_REG
);
1685 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
1689 /* If there is future mcount call in the function it is more profitable
1690 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1691 rtx reg
= crtl
->profile
1692 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
1693 : pic_offset_table_rtx
;
1694 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
1695 RTX_FRAME_RELATED_P (insn
) = 1;
1697 emit_move_insn (pic_offset_table_rtx
, reg
);
1698 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
1704 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1705 insert_insn_on_edge (seq
, entry_edge
);
1706 commit_one_edge_insertion (entry_edge
);
1709 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1710 for a call to a function whose data type is FNTYPE.
1711 For a library call, FNTYPE is 0. */
1714 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1715 tree fntype
, /* tree ptr for function decl */
1716 rtx libname
, /* SYMBOL_REF of library name or 0 */
1720 struct cgraph_local_info
*i
= NULL
;
1721 struct cgraph_node
*target
= NULL
;
1723 memset (cum
, 0, sizeof (*cum
));
1727 target
= cgraph_node::get (fndecl
);
1730 target
= target
->function_symbol ();
1731 i
= cgraph_node::local_info (target
->decl
);
1732 cum
->call_abi
= ix86_function_abi (target
->decl
);
1735 cum
->call_abi
= ix86_function_abi (fndecl
);
1738 cum
->call_abi
= ix86_function_type_abi (fntype
);
1740 cum
->caller
= caller
;
1742 /* Set up the number of registers to use for passing arguments. */
1743 cum
->nregs
= ix86_regparm
;
1746 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
1747 ? X86_64_REGPARM_MAX
1748 : X86_64_MS_REGPARM_MAX
);
1752 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1755 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
1756 ? X86_64_SSE_REGPARM_MAX
1757 : X86_64_MS_SSE_REGPARM_MAX
);
1761 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1762 cum
->warn_avx512f
= true;
1763 cum
->warn_avx
= true;
1764 cum
->warn_sse
= true;
1765 cum
->warn_mmx
= true;
1767 /* Because type might mismatch in between caller and callee, we need to
1768 use actual type of function for local calls.
1769 FIXME: cgraph_analyze can be told to actually record if function uses
1770 va_start so for local functions maybe_vaarg can be made aggressive
1772 FIXME: once typesytem is fixed, we won't need this code anymore. */
1773 if (i
&& i
->local
&& i
->can_change_signature
)
1774 fntype
= TREE_TYPE (target
->decl
);
1775 cum
->stdarg
= stdarg_p (fntype
);
1776 cum
->maybe_vaarg
= (fntype
1777 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
1782 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
1783 if (!cum
->warn_empty
&& fntype
)
1785 function_args_iterator iter
;
1787 bool seen_empty_type
= false;
1788 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
1790 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
1792 if (TYPE_EMPTY_P (argtype
))
1793 seen_empty_type
= true;
1794 else if (seen_empty_type
)
1796 cum
->warn_empty
= true;
1804 /* If there are variable arguments, then we won't pass anything
1805 in registers in 32-bit mode. */
1806 if (stdarg_p (fntype
))
1809 /* Since in 32-bit, variable arguments are always passed on
1810 stack, there is scratch register available for indirect
1812 cfun
->machine
->arg_reg_available
= true;
1815 cum
->warn_avx512f
= false;
1816 cum
->warn_avx
= false;
1817 cum
->warn_sse
= false;
1818 cum
->warn_mmx
= false;
1822 /* Use ecx and edx registers if function has fastcall attribute,
1823 else look for regparm information. */
1826 unsigned int ccvt
= ix86_get_callcvt (fntype
);
1827 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1830 cum
->fastcall
= 1; /* Same first register as in fastcall. */
1832 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1838 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1841 /* Set up the number of SSE registers used for passing SFmode
1842 and DFmode arguments. Warn for mismatching ABI. */
1843 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
1846 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
1849 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1850 But in the case of vector types, it is some vector mode.
1852 When we have only some of our vector isa extensions enabled, then there
1853 are some modes for which vector_mode_supported_p is false. For these
1854 modes, the generic vector support in gcc will choose some non-vector mode
1855 in order to implement the type. By computing the natural mode, we'll
1856 select the proper ABI location for the operand and not depend on whatever
1857 the middle-end decides to do with these vector types.
1859 The midde-end can't deal with the vector types > 16 bytes. In this
1860 case, we return the original mode and warn ABI change if CUM isn't
1863 If INT_RETURN is true, warn ABI change if the vector mode isn't
1864 available for function return value. */
1867 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
1870 machine_mode mode
= TYPE_MODE (type
);
1872 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
1874 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1875 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
1876 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1877 && TYPE_VECTOR_SUBPARTS (type
) > 1)
1879 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
1881 /* There are no XFmode vector modes. */
1882 if (innermode
== XFmode
)
1885 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
1886 mode
= MIN_MODE_VECTOR_FLOAT
;
1888 mode
= MIN_MODE_VECTOR_INT
;
1890 /* Get the mode which has this inner mode and number of units. */
1891 FOR_EACH_MODE_FROM (mode
, mode
)
1892 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
1893 && GET_MODE_INNER (mode
) == innermode
)
1895 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
1897 static bool warnedavx512f
;
1898 static bool warnedavx512f_ret
;
1900 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
1902 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
1903 "without AVX512F enabled changes the ABI"))
1904 warnedavx512f
= true;
1906 else if (in_return
&& !warnedavx512f_ret
)
1908 if (warning (OPT_Wpsabi
, "AVX512F vector return "
1909 "without AVX512F enabled changes the ABI"))
1910 warnedavx512f_ret
= true;
1913 return TYPE_MODE (type
);
1915 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
1917 static bool warnedavx
;
1918 static bool warnedavx_ret
;
1920 if (cum
&& cum
->warn_avx
&& !warnedavx
)
1922 if (warning (OPT_Wpsabi
, "AVX vector argument "
1923 "without AVX enabled changes the ABI"))
1926 else if (in_return
&& !warnedavx_ret
)
1928 if (warning (OPT_Wpsabi
, "AVX vector return "
1929 "without AVX enabled changes the ABI"))
1930 warnedavx_ret
= true;
1933 return TYPE_MODE (type
);
1935 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
1939 static bool warnedsse
;
1940 static bool warnedsse_ret
;
1942 if (cum
&& cum
->warn_sse
&& !warnedsse
)
1944 if (warning (OPT_Wpsabi
, "SSE vector argument "
1945 "without SSE enabled changes the ABI"))
1948 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
1950 if (warning (OPT_Wpsabi
, "SSE vector return "
1951 "without SSE enabled changes the ABI"))
1952 warnedsse_ret
= true;
1955 else if ((size
== 8 && !TARGET_64BIT
)
1957 || cfun
->machine
->func_type
== TYPE_NORMAL
)
1961 static bool warnedmmx
;
1962 static bool warnedmmx_ret
;
1964 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
1966 if (warning (OPT_Wpsabi
, "MMX vector argument "
1967 "without MMX enabled changes the ABI"))
1970 else if (in_return
&& !warnedmmx_ret
)
1972 if (warning (OPT_Wpsabi
, "MMX vector return "
1973 "without MMX enabled changes the ABI"))
1974 warnedmmx_ret
= true;
1987 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1988 this may not agree with the mode that the type system has chosen for the
1989 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1990 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1993 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
1998 if (orig_mode
!= BLKmode
)
1999 tmp
= gen_rtx_REG (orig_mode
, regno
);
2002 tmp
= gen_rtx_REG (mode
, regno
);
2003 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2004 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2010 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2011 of this code is to classify each 8bytes of incoming argument by the register
2012 class and assign registers accordingly. */
2014 /* Return the union class of CLASS1 and CLASS2.
2015 See the x86-64 PS ABI for details. */
2017 static enum x86_64_reg_class
2018 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2020 /* Rule #1: If both classes are equal, this is the resulting class. */
2021 if (class1
== class2
)
2024 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2026 if (class1
== X86_64_NO_CLASS
)
2028 if (class2
== X86_64_NO_CLASS
)
2031 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2032 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2033 return X86_64_MEMORY_CLASS
;
2035 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2036 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2037 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2038 return X86_64_INTEGERSI_CLASS
;
2039 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2040 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2041 return X86_64_INTEGER_CLASS
;
2043 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2045 if (class1
== X86_64_X87_CLASS
2046 || class1
== X86_64_X87UP_CLASS
2047 || class1
== X86_64_COMPLEX_X87_CLASS
2048 || class2
== X86_64_X87_CLASS
2049 || class2
== X86_64_X87UP_CLASS
2050 || class2
== X86_64_COMPLEX_X87_CLASS
)
2051 return X86_64_MEMORY_CLASS
;
2053 /* Rule #6: Otherwise class SSE is used. */
2054 return X86_64_SSE_CLASS
;
2057 /* Classify the argument of type TYPE and mode MODE.
2058 CLASSES will be filled by the register class used to pass each word
2059 of the operand. The number of words is returned. In case the parameter
2060 should be passed in memory, 0 is returned. As a special case for zero
2061 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2063 BIT_OFFSET is used internally for handling records and specifies offset
2064 of the offset in bits modulo 512 to avoid overflow cases.
2066 See the x86-64 PS ABI for details.
2070 classify_argument (machine_mode mode
, const_tree type
,
2071 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2074 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2075 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
2077 /* Variable sized entities are always passed/returned in memory. */
2081 if (mode
!= VOIDmode
2082 && targetm
.calls
.must_pass_in_stack (mode
, type
))
2085 if (type
&& AGGREGATE_TYPE_P (type
))
2089 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2091 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2095 for (i
= 0; i
< words
; i
++)
2096 classes
[i
] = X86_64_NO_CLASS
;
2098 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2099 signalize memory class, so handle it as special case. */
2102 classes
[0] = X86_64_NO_CLASS
;
2106 /* Classify each field of record and merge classes. */
2107 switch (TREE_CODE (type
))
2110 /* And now merge the fields of structure. */
2111 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2113 if (TREE_CODE (field
) == FIELD_DECL
)
2117 if (TREE_TYPE (field
) == error_mark_node
)
2120 /* Bitfields are always classified as integer. Handle them
2121 early, since later code would consider them to be
2122 misaligned integers. */
2123 if (DECL_BIT_FIELD (field
))
2125 for (i
= (int_bit_position (field
)
2126 + (bit_offset
% 64)) / 8 / 8;
2127 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2128 + tree_to_shwi (DECL_SIZE (field
))
2131 = merge_classes (X86_64_INTEGER_CLASS
, classes
[i
]);
2137 type
= TREE_TYPE (field
);
2139 /* Flexible array member is ignored. */
2140 if (TYPE_MODE (type
) == BLKmode
2141 && TREE_CODE (type
) == ARRAY_TYPE
2142 && TYPE_SIZE (type
) == NULL_TREE
2143 && TYPE_DOMAIN (type
) != NULL_TREE
2144 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
2149 if (!warned
&& warn_psabi
)
2152 inform (input_location
,
2153 "the ABI of passing struct with"
2154 " a flexible array member has"
2155 " changed in GCC 4.4");
2159 num
= classify_argument (TYPE_MODE (type
), type
,
2161 (int_bit_position (field
)
2162 + bit_offset
) % 512);
2165 pos
= (int_bit_position (field
)
2166 + (bit_offset
% 64)) / 8 / 8;
2167 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
2169 = merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2176 /* Arrays are handled as small records. */
2179 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2180 TREE_TYPE (type
), subclasses
, bit_offset
);
2184 /* The partial classes are now full classes. */
2185 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2186 subclasses
[0] = X86_64_SSE_CLASS
;
2187 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
2188 && !((bit_offset
% 64) == 0 && bytes
== 4))
2189 subclasses
[0] = X86_64_INTEGER_CLASS
;
2191 for (i
= 0; i
< words
; i
++)
2192 classes
[i
] = subclasses
[i
% num
];
2197 case QUAL_UNION_TYPE
:
2198 /* Unions are similar to RECORD_TYPE but offset is always 0.
2200 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2202 if (TREE_CODE (field
) == FIELD_DECL
)
2206 if (TREE_TYPE (field
) == error_mark_node
)
2209 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2210 TREE_TYPE (field
), subclasses
,
2214 for (i
= 0; i
< num
&& i
< words
; i
++)
2215 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2226 /* When size > 16 bytes, if the first one isn't
2227 X86_64_SSE_CLASS or any other ones aren't
2228 X86_64_SSEUP_CLASS, everything should be passed in
2230 if (classes
[0] != X86_64_SSE_CLASS
)
2233 for (i
= 1; i
< words
; i
++)
2234 if (classes
[i
] != X86_64_SSEUP_CLASS
)
2238 /* Final merger cleanup. */
2239 for (i
= 0; i
< words
; i
++)
2241 /* If one class is MEMORY, everything should be passed in
2243 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2246 /* The X86_64_SSEUP_CLASS should be always preceded by
2247 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2248 if (classes
[i
] == X86_64_SSEUP_CLASS
2249 && classes
[i
- 1] != X86_64_SSE_CLASS
2250 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
2252 /* The first one should never be X86_64_SSEUP_CLASS. */
2253 gcc_assert (i
!= 0);
2254 classes
[i
] = X86_64_SSE_CLASS
;
2257 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2258 everything should be passed in memory. */
2259 if (classes
[i
] == X86_64_X87UP_CLASS
2260 && (classes
[i
- 1] != X86_64_X87_CLASS
))
2264 /* The first one should never be X86_64_X87UP_CLASS. */
2265 gcc_assert (i
!= 0);
2266 if (!warned
&& warn_psabi
)
2269 inform (input_location
,
2270 "the ABI of passing union with %<long double%>"
2271 " has changed in GCC 4.4");
2279 /* Compute alignment needed. We align all types to natural boundaries with
2280 exception of XFmode that is aligned to 64bits. */
2281 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2283 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2286 mode_alignment
= 128;
2287 else if (mode
== XCmode
)
2288 mode_alignment
= 256;
2289 if (COMPLEX_MODE_P (mode
))
2290 mode_alignment
/= 2;
2291 /* Misaligned fields are always returned in memory. */
2292 if (bit_offset
% mode_alignment
)
2296 /* for V1xx modes, just use the base mode */
2297 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
2298 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
2299 mode
= GET_MODE_INNER (mode
);
2301 /* Classification of atomic types. */
2306 classes
[0] = X86_64_SSE_CLASS
;
2309 classes
[0] = X86_64_SSE_CLASS
;
2310 classes
[1] = X86_64_SSEUP_CLASS
;
2320 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
2322 /* Analyze last 128 bits only. */
2323 size
= (size
- 1) & 0x7f;
2327 classes
[0] = X86_64_INTEGERSI_CLASS
;
2332 classes
[0] = X86_64_INTEGER_CLASS
;
2335 else if (size
< 64+32)
2337 classes
[0] = X86_64_INTEGER_CLASS
;
2338 classes
[1] = X86_64_INTEGERSI_CLASS
;
2341 else if (size
< 64+64)
2343 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2351 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2355 /* OImode shouldn't be used directly. */
2360 if (!(bit_offset
% 64))
2361 classes
[0] = X86_64_SSESF_CLASS
;
2363 classes
[0] = X86_64_SSE_CLASS
;
2366 classes
[0] = X86_64_SSEDF_CLASS
;
2369 classes
[0] = X86_64_X87_CLASS
;
2370 classes
[1] = X86_64_X87UP_CLASS
;
2373 classes
[0] = X86_64_SSE_CLASS
;
2374 classes
[1] = X86_64_SSEUP_CLASS
;
2377 classes
[0] = X86_64_SSE_CLASS
;
2378 if (!(bit_offset
% 64))
2384 if (!warned
&& warn_psabi
)
2387 inform (input_location
,
2388 "the ABI of passing structure with %<complex float%>"
2389 " member has changed in GCC 4.4");
2391 classes
[1] = X86_64_SSESF_CLASS
;
2395 classes
[0] = X86_64_SSEDF_CLASS
;
2396 classes
[1] = X86_64_SSEDF_CLASS
;
2399 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2402 /* This modes is larger than 16 bytes. */
2410 classes
[0] = X86_64_SSE_CLASS
;
2411 classes
[1] = X86_64_SSEUP_CLASS
;
2412 classes
[2] = X86_64_SSEUP_CLASS
;
2413 classes
[3] = X86_64_SSEUP_CLASS
;
2421 classes
[0] = X86_64_SSE_CLASS
;
2422 classes
[1] = X86_64_SSEUP_CLASS
;
2423 classes
[2] = X86_64_SSEUP_CLASS
;
2424 classes
[3] = X86_64_SSEUP_CLASS
;
2425 classes
[4] = X86_64_SSEUP_CLASS
;
2426 classes
[5] = X86_64_SSEUP_CLASS
;
2427 classes
[6] = X86_64_SSEUP_CLASS
;
2428 classes
[7] = X86_64_SSEUP_CLASS
;
2436 classes
[0] = X86_64_SSE_CLASS
;
2437 classes
[1] = X86_64_SSEUP_CLASS
;
2445 classes
[0] = X86_64_SSE_CLASS
;
2451 gcc_assert (VECTOR_MODE_P (mode
));
2456 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2458 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2459 classes
[0] = X86_64_INTEGERSI_CLASS
;
2461 classes
[0] = X86_64_INTEGER_CLASS
;
2462 classes
[1] = X86_64_INTEGER_CLASS
;
2463 return 1 + (bytes
> 8);
2467 /* Examine the argument and return set number of register required in each
2468 class. Return true iff parameter should be passed in memory. */
2471 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
2472 int *int_nregs
, int *sse_nregs
)
2474 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2475 int n
= classify_argument (mode
, type
, regclass
, 0);
2482 for (n
--; n
>= 0; n
--)
2483 switch (regclass
[n
])
2485 case X86_64_INTEGER_CLASS
:
2486 case X86_64_INTEGERSI_CLASS
:
2489 case X86_64_SSE_CLASS
:
2490 case X86_64_SSESF_CLASS
:
2491 case X86_64_SSEDF_CLASS
:
2494 case X86_64_NO_CLASS
:
2495 case X86_64_SSEUP_CLASS
:
2497 case X86_64_X87_CLASS
:
2498 case X86_64_X87UP_CLASS
:
2499 case X86_64_COMPLEX_X87_CLASS
:
2503 case X86_64_MEMORY_CLASS
:
2510 /* Construct container for the argument used by GCC interface. See
2511 FUNCTION_ARG for the detailed description. */
2514 construct_container (machine_mode mode
, machine_mode orig_mode
,
2515 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
2516 const int *intreg
, int sse_regno
)
2518 /* The following variables hold the static issued_error state. */
2519 static bool issued_sse_arg_error
;
2520 static bool issued_sse_ret_error
;
2521 static bool issued_x87_ret_error
;
2523 machine_mode tmpmode
;
2525 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2526 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2530 int needed_sseregs
, needed_intregs
;
2531 rtx exp
[MAX_CLASSES
];
2534 n
= classify_argument (mode
, type
, regclass
, 0);
2537 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
2540 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2543 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2544 some less clueful developer tries to use floating-point anyway. */
2545 if (needed_sseregs
&& !TARGET_SSE
)
2549 if (!issued_sse_ret_error
)
2551 error ("SSE register return with SSE disabled");
2552 issued_sse_ret_error
= true;
2555 else if (!issued_sse_arg_error
)
2557 error ("SSE register argument with SSE disabled");
2558 issued_sse_arg_error
= true;
2563 /* Likewise, error if the ABI requires us to return values in the
2564 x87 registers and the user specified -mno-80387. */
2565 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
2566 for (i
= 0; i
< n
; i
++)
2567 if (regclass
[i
] == X86_64_X87_CLASS
2568 || regclass
[i
] == X86_64_X87UP_CLASS
2569 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
2571 if (!issued_x87_ret_error
)
2573 error ("x87 register return with x87 disabled");
2574 issued_x87_ret_error
= true;
2579 /* First construct simple cases. Avoid SCmode, since we want to use
2580 single register to pass this type. */
2581 if (n
== 1 && mode
!= SCmode
)
2582 switch (regclass
[0])
2584 case X86_64_INTEGER_CLASS
:
2585 case X86_64_INTEGERSI_CLASS
:
2586 return gen_rtx_REG (mode
, intreg
[0]);
2587 case X86_64_SSE_CLASS
:
2588 case X86_64_SSESF_CLASS
:
2589 case X86_64_SSEDF_CLASS
:
2590 if (mode
!= BLKmode
)
2591 return gen_reg_or_parallel (mode
, orig_mode
,
2592 GET_SSE_REGNO (sse_regno
));
2594 case X86_64_X87_CLASS
:
2595 case X86_64_COMPLEX_X87_CLASS
:
2596 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2597 case X86_64_NO_CLASS
:
2598 /* Zero sized array, struct or class. */
2604 && regclass
[0] == X86_64_SSE_CLASS
2605 && regclass
[1] == X86_64_SSEUP_CLASS
2607 return gen_reg_or_parallel (mode
, orig_mode
,
2608 GET_SSE_REGNO (sse_regno
));
2610 && regclass
[0] == X86_64_SSE_CLASS
2611 && regclass
[1] == X86_64_SSEUP_CLASS
2612 && regclass
[2] == X86_64_SSEUP_CLASS
2613 && regclass
[3] == X86_64_SSEUP_CLASS
2615 return gen_reg_or_parallel (mode
, orig_mode
,
2616 GET_SSE_REGNO (sse_regno
));
2618 && regclass
[0] == X86_64_SSE_CLASS
2619 && regclass
[1] == X86_64_SSEUP_CLASS
2620 && regclass
[2] == X86_64_SSEUP_CLASS
2621 && regclass
[3] == X86_64_SSEUP_CLASS
2622 && regclass
[4] == X86_64_SSEUP_CLASS
2623 && regclass
[5] == X86_64_SSEUP_CLASS
2624 && regclass
[6] == X86_64_SSEUP_CLASS
2625 && regclass
[7] == X86_64_SSEUP_CLASS
2627 return gen_reg_or_parallel (mode
, orig_mode
,
2628 GET_SSE_REGNO (sse_regno
));
2630 && regclass
[0] == X86_64_X87_CLASS
2631 && regclass
[1] == X86_64_X87UP_CLASS
)
2632 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2635 && regclass
[0] == X86_64_INTEGER_CLASS
2636 && regclass
[1] == X86_64_INTEGER_CLASS
2637 && (mode
== CDImode
|| mode
== TImode
|| mode
== BLKmode
)
2638 && intreg
[0] + 1 == intreg
[1])
2640 if (mode
== BLKmode
)
2642 /* Use TImode for BLKmode values in 2 integer registers. */
2643 exp
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
2644 gen_rtx_REG (TImode
, intreg
[0]),
2646 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (1));
2647 XVECEXP (ret
, 0, 0) = exp
[0];
2651 return gen_rtx_REG (mode
, intreg
[0]);
2654 /* Otherwise figure out the entries of the PARALLEL. */
2655 for (i
= 0; i
< n
; i
++)
2659 switch (regclass
[i
])
2661 case X86_64_NO_CLASS
:
2663 case X86_64_INTEGER_CLASS
:
2664 case X86_64_INTEGERSI_CLASS
:
2665 /* Merge TImodes on aligned occasions here too. */
2666 if (i
* 8 + 8 > bytes
)
2668 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
2669 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
2670 /* We've requested 24 bytes we
2671 don't have mode for. Use DImode. */
2674 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
2679 = gen_rtx_EXPR_LIST (VOIDmode
,
2680 gen_rtx_REG (tmpmode
, *intreg
),
2684 case X86_64_SSESF_CLASS
:
2686 = gen_rtx_EXPR_LIST (VOIDmode
,
2687 gen_rtx_REG (SFmode
,
2688 GET_SSE_REGNO (sse_regno
)),
2692 case X86_64_SSEDF_CLASS
:
2694 = gen_rtx_EXPR_LIST (VOIDmode
,
2695 gen_rtx_REG (DFmode
,
2696 GET_SSE_REGNO (sse_regno
)),
2700 case X86_64_SSE_CLASS
:
2708 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
2718 && regclass
[1] == X86_64_SSEUP_CLASS
2719 && regclass
[2] == X86_64_SSEUP_CLASS
2720 && regclass
[3] == X86_64_SSEUP_CLASS
);
2726 && regclass
[1] == X86_64_SSEUP_CLASS
2727 && regclass
[2] == X86_64_SSEUP_CLASS
2728 && regclass
[3] == X86_64_SSEUP_CLASS
2729 && regclass
[4] == X86_64_SSEUP_CLASS
2730 && regclass
[5] == X86_64_SSEUP_CLASS
2731 && regclass
[6] == X86_64_SSEUP_CLASS
2732 && regclass
[7] == X86_64_SSEUP_CLASS
);
2740 = gen_rtx_EXPR_LIST (VOIDmode
,
2741 gen_rtx_REG (tmpmode
,
2742 GET_SSE_REGNO (sse_regno
)),
2751 /* Empty aligned struct, union or class. */
2755 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2756 for (i
= 0; i
< nexps
; i
++)
2757 XVECEXP (ret
, 0, i
) = exp
[i
];
2761 /* Update the data in CUM to advance over an argument of mode MODE
2762 and data type TYPE. (TYPE is null for libcalls where that information
2763 may not be available.)
2765 Return a number of integer regsiters advanced over. */
2768 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2769 const_tree type
, HOST_WIDE_INT bytes
,
2770 HOST_WIDE_INT words
)
2773 bool error_p
= false;
2777 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2778 bytes in registers. */
2779 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
2799 cum
->words
+= words
;
2800 cum
->nregs
-= words
;
2801 cum
->regno
+= words
;
2802 if (cum
->nregs
>= 0)
2804 if (cum
->nregs
<= 0)
2807 cfun
->machine
->arg_reg_available
= false;
2813 /* OImode shouldn't be used directly. */
2817 if (cum
->float_in_sse
== -1)
2819 if (cum
->float_in_sse
< 2)
2823 if (cum
->float_in_sse
== -1)
2825 if (cum
->float_in_sse
< 1)
2848 if (!type
|| !AGGREGATE_TYPE_P (type
))
2850 cum
->sse_words
+= words
;
2851 cum
->sse_nregs
-= 1;
2852 cum
->sse_regno
+= 1;
2853 if (cum
->sse_nregs
<= 0)
2867 if (!type
|| !AGGREGATE_TYPE_P (type
))
2869 cum
->mmx_words
+= words
;
2870 cum
->mmx_nregs
-= 1;
2871 cum
->mmx_regno
+= 1;
2872 if (cum
->mmx_nregs
<= 0)
2882 cum
->float_in_sse
= 0;
2883 error ("calling %qD with SSE calling convention without "
2884 "SSE/SSE2 enabled", cum
->decl
);
2885 sorry ("this is a GCC bug that can be worked around by adding "
2886 "attribute used to function called");
2893 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2894 const_tree type
, HOST_WIDE_INT words
, bool named
)
2896 int int_nregs
, sse_nregs
;
2898 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2899 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
2900 || VALID_AVX256_REG_MODE (mode
)))
2903 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
2904 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2906 cum
->nregs
-= int_nregs
;
2907 cum
->sse_nregs
-= sse_nregs
;
2908 cum
->regno
+= int_nregs
;
2909 cum
->sse_regno
+= sse_nregs
;
2914 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
2915 cum
->words
= ROUND_UP (cum
->words
, align
);
2916 cum
->words
+= words
;
2922 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
2923 HOST_WIDE_INT words
)
2925 /* Otherwise, this should be passed indirect. */
2926 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
2928 cum
->words
+= words
;
2938 /* Update the data in CUM to advance over an argument of mode MODE and
2939 data type TYPE. (TYPE is null for libcalls where that information
2940 may not be available.) */
2943 ix86_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
2944 const_tree type
, bool named
)
2946 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2947 HOST_WIDE_INT bytes
, words
;
2950 /* The argument of interrupt handler is a special case and is
2951 handled in ix86_function_arg. */
2952 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
2955 if (mode
== BLKmode
)
2956 bytes
= int_size_in_bytes (type
);
2958 bytes
= GET_MODE_SIZE (mode
);
2959 words
= CEIL (bytes
, UNITS_PER_WORD
);
2962 mode
= type_natural_mode (type
, NULL
, false);
2966 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
2968 if (call_abi
== MS_ABI
)
2969 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
2971 nregs
= function_arg_advance_64 (cum
, mode
, type
, words
, named
);
2974 nregs
= function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
2978 /* Track if there are outgoing arguments on stack. */
2980 cfun
->machine
->outgoing_args_on_stack
= true;
2984 /* Define where to put the arguments to a function.
2985 Value is zero to push the argument on the stack,
2986 or a hard register in which to store the argument.
2988 MODE is the argument's machine mode.
2989 TYPE is the data type of the argument (as a tree).
2990 This is null for libcalls where that information may
2992 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2993 the preceding args and about the function being called.
2994 NAMED is nonzero if this argument is a named parameter
2995 (otherwise it is an extra parameter matching an ellipsis). */
2998 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2999 machine_mode orig_mode
, const_tree type
,
3000 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3002 bool error_p
= false;
3004 /* Avoid the AL settings for the Unix64 ABI. */
3005 if (mode
== VOIDmode
)
3010 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3011 bytes in registers. */
3012 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
3031 if (words
<= cum
->nregs
)
3033 int regno
= cum
->regno
;
3035 /* Fastcall allocates the first two DWORD (SImode) or
3036 smaller arguments to ECX and EDX if it isn't an
3042 || (type
&& AGGREGATE_TYPE_P (type
)))
3045 /* ECX not EAX is the first allocated register. */
3046 if (regno
== AX_REG
)
3049 return gen_rtx_REG (mode
, regno
);
3054 if (cum
->float_in_sse
== -1)
3056 if (cum
->float_in_sse
< 2)
3060 if (cum
->float_in_sse
== -1)
3062 if (cum
->float_in_sse
< 1)
3066 /* In 32bit, we pass TImode in xmm registers. */
3073 if (!type
|| !AGGREGATE_TYPE_P (type
))
3076 return gen_reg_or_parallel (mode
, orig_mode
,
3077 cum
->sse_regno
+ FIRST_SSE_REG
);
3083 /* OImode and XImode shouldn't be used directly. */
3098 if (!type
|| !AGGREGATE_TYPE_P (type
))
3101 return gen_reg_or_parallel (mode
, orig_mode
,
3102 cum
->sse_regno
+ FIRST_SSE_REG
);
3112 if (!type
|| !AGGREGATE_TYPE_P (type
))
3115 return gen_reg_or_parallel (mode
, orig_mode
,
3116 cum
->mmx_regno
+ FIRST_MMX_REG
);
3122 cum
->float_in_sse
= 0;
3123 error ("calling %qD with SSE calling convention without "
3124 "SSE/SSE2 enabled", cum
->decl
);
3125 sorry ("this is a GCC bug that can be worked around by adding "
3126 "attribute used to function called");
3133 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3134 machine_mode orig_mode
, const_tree type
, bool named
)
3136 /* Handle a hidden AL argument containing number of registers
3137 for varargs x86-64 functions. */
3138 if (mode
== VOIDmode
)
3139 return GEN_INT (cum
->maybe_vaarg
3140 ? (cum
->sse_nregs
< 0
3141 ? X86_64_SSE_REGPARM_MAX
3162 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3168 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3170 &x86_64_int_parameter_registers
[cum
->regno
],
3175 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3176 machine_mode orig_mode
, bool named
,
3177 HOST_WIDE_INT bytes
)
3181 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3182 We use value of -2 to specify that current function call is MSABI. */
3183 if (mode
== VOIDmode
)
3184 return GEN_INT (-2);
3186 /* If we've run out of registers, it goes on the stack. */
3187 if (cum
->nregs
== 0)
3190 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3192 /* Only floating point modes are passed in anything but integer regs. */
3193 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3196 regno
= cum
->regno
+ FIRST_SSE_REG
;
3201 /* Unnamed floating parameters are passed in both the
3202 SSE and integer registers. */
3203 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3204 t2
= gen_rtx_REG (mode
, regno
);
3205 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3206 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3207 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3210 /* Handle aggregated types passed in register. */
3211 if (orig_mode
== BLKmode
)
3213 if (bytes
> 0 && bytes
<= 8)
3214 mode
= (bytes
> 4 ? DImode
: SImode
);
3215 if (mode
== BLKmode
)
3219 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3222 /* Return where to put the arguments to a function.
3223 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3225 MODE is the argument's machine mode. TYPE is the data type of the
3226 argument. It is null for libcalls where that information may not be
3227 available. CUM gives information about the preceding args and about
3228 the function being called. NAMED is nonzero if this argument is a
3229 named parameter (otherwise it is an extra parameter matching an
3233 ix86_function_arg (cumulative_args_t cum_v
, machine_mode omode
,
3234 const_tree type
, bool named
)
3236 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3237 machine_mode mode
= omode
;
3238 HOST_WIDE_INT bytes
, words
;
3241 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3243 gcc_assert (type
!= NULL_TREE
);
3244 if (POINTER_TYPE_P (type
))
3246 /* This is the pointer argument. */
3247 gcc_assert (TYPE_MODE (type
) == Pmode
);
3248 /* It is at -WORD(AP) in the current frame in interrupt and
3249 exception handlers. */
3250 arg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
3254 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
3255 && TREE_CODE (type
) == INTEGER_TYPE
3256 && TYPE_MODE (type
) == word_mode
);
3257 /* The error code is the word-mode integer argument at
3258 -2 * WORD(AP) in the current frame of the exception
3260 arg
= gen_rtx_MEM (word_mode
,
3261 plus_constant (Pmode
,
3263 -2 * UNITS_PER_WORD
));
3268 if (mode
== BLKmode
)
3269 bytes
= int_size_in_bytes (type
);
3271 bytes
= GET_MODE_SIZE (mode
);
3272 words
= CEIL (bytes
, UNITS_PER_WORD
);
3274 /* To simplify the code below, represent vector types with a vector mode
3275 even if MMX/SSE are not active. */
3276 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
3277 mode
= type_natural_mode (type
, cum
, false);
3281 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3283 if (call_abi
== MS_ABI
)
3284 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
3286 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
3289 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
3291 /* Track if there are outgoing arguments on stack. */
3292 if (arg
== NULL_RTX
&& cum
->caller
)
3293 cfun
->machine
->outgoing_args_on_stack
= true;
3298 /* A C expression that indicates when an argument must be passed by
3299 reference. If nonzero for an argument, a copy of that argument is
3300 made in memory and a pointer to the argument is passed instead of
3301 the argument itself. The pointer is passed in whatever way is
3302 appropriate for passing a pointer to that type. */
3305 ix86_pass_by_reference (cumulative_args_t cum_v
, machine_mode mode
,
3306 const_tree type
, bool)
3308 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3312 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3314 /* See Windows x64 Software Convention. */
3315 if (call_abi
== MS_ABI
)
3317 HOST_WIDE_INT msize
= GET_MODE_SIZE (mode
);
3321 /* Arrays are passed by reference. */
3322 if (TREE_CODE (type
) == ARRAY_TYPE
)
3325 if (RECORD_OR_UNION_TYPE_P (type
))
3327 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3328 are passed by reference. */
3329 msize
= int_size_in_bytes (type
);
3333 /* __m128 is passed by reference. */
3334 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
3336 else if (type
&& int_size_in_bytes (type
) == -1)
3343 /* Return true when TYPE should be 128bit aligned for 32bit argument
3344 passing ABI. XXX: This function is obsolete and is only used for
3345 checking psABI compatibility with previous versions of GCC. */
3348 ix86_compat_aligned_value_p (const_tree type
)
3350 machine_mode mode
= TYPE_MODE (type
);
3351 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
3355 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3357 if (TYPE_ALIGN (type
) < 128)
3360 if (AGGREGATE_TYPE_P (type
))
3362 /* Walk the aggregates recursively. */
3363 switch (TREE_CODE (type
))
3367 case QUAL_UNION_TYPE
:
3371 /* Walk all the structure fields. */
3372 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3374 if (TREE_CODE (field
) == FIELD_DECL
3375 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
3382 /* Just for use if some languages passes arrays by value. */
3383 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
3394 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3395 XXX: This function is obsolete and is only used for checking psABI
3396 compatibility with previous versions of GCC. */
3399 ix86_compat_function_arg_boundary (machine_mode mode
,
3400 const_tree type
, unsigned int align
)
3402 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3403 natural boundaries. */
3404 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
3406 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3407 make an exception for SSE modes since these require 128bit
3410 The handling here differs from field_alignment. ICC aligns MMX
3411 arguments to 4 byte boundaries, while structure fields are aligned
3412 to 8 byte boundaries. */
3415 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
3416 align
= PARM_BOUNDARY
;
3420 if (!ix86_compat_aligned_value_p (type
))
3421 align
= PARM_BOUNDARY
;
3424 if (align
> BIGGEST_ALIGNMENT
)
3425 align
= BIGGEST_ALIGNMENT
;
3429 /* Return true when TYPE should be 128bit aligned for 32bit argument
3433 ix86_contains_aligned_value_p (const_tree type
)
3435 machine_mode mode
= TYPE_MODE (type
);
3437 if (mode
== XFmode
|| mode
== XCmode
)
3440 if (TYPE_ALIGN (type
) < 128)
3443 if (AGGREGATE_TYPE_P (type
))
3445 /* Walk the aggregates recursively. */
3446 switch (TREE_CODE (type
))
3450 case QUAL_UNION_TYPE
:
3454 /* Walk all the structure fields. */
3455 for (field
= TYPE_FIELDS (type
);
3457 field
= DECL_CHAIN (field
))
3459 if (TREE_CODE (field
) == FIELD_DECL
3460 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
3467 /* Just for use if some languages passes arrays by value. */
3468 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
3477 return TYPE_ALIGN (type
) >= 128;
3482 /* Gives the alignment boundary, in bits, of an argument with the
3483 specified mode and type. */
3486 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
3491 /* Since the main variant type is used for call, we convert it to
3492 the main variant type. */
3493 type
= TYPE_MAIN_VARIANT (type
);
3494 align
= TYPE_ALIGN (type
);
3495 if (TYPE_EMPTY_P (type
))
3496 return PARM_BOUNDARY
;
3499 align
= GET_MODE_ALIGNMENT (mode
);
3500 if (align
< PARM_BOUNDARY
)
3501 align
= PARM_BOUNDARY
;
3505 unsigned int saved_align
= align
;
3509 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3512 if (mode
== XFmode
|| mode
== XCmode
)
3513 align
= PARM_BOUNDARY
;
3515 else if (!ix86_contains_aligned_value_p (type
))
3516 align
= PARM_BOUNDARY
;
3519 align
= PARM_BOUNDARY
;
3524 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
3528 inform (input_location
,
3529 "the ABI for passing parameters with %d-byte"
3530 " alignment has changed in GCC 4.6",
3531 align
/ BITS_PER_UNIT
);
3538 /* Return true if N is a possible register number of function value. */
3541 ix86_function_value_regno_p (const unsigned int regno
)
3548 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
3551 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
3553 /* Complex values are returned in %st(0)/%st(1) pair. */
3556 /* TODO: The function should depend on current function ABI but
3557 builtins.c would need updating then. Therefore we use the
3559 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
3561 return TARGET_FLOAT_RETURNS_IN_80387
;
3563 /* Complex values are returned in %xmm0/%xmm1 pair. */
3569 if (TARGET_MACHO
|| TARGET_64BIT
)
3577 /* Define how to find the value returned by a function.
3578 VALTYPE is the data type of the value (as a tree).
3579 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3580 otherwise, FUNC is 0. */
3583 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
3584 const_tree fntype
, const_tree fn
)
3588 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3589 we normally prevent this case when mmx is not available. However
3590 some ABIs may require the result to be returned like DImode. */
3591 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3592 regno
= FIRST_MMX_REG
;
3594 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3595 we prevent this case when sse is not available. However some ABIs
3596 may require the result to be returned like integer TImode. */
3597 else if (mode
== TImode
3598 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3599 regno
= FIRST_SSE_REG
;
3601 /* 32-byte vector modes in %ymm0. */
3602 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
3603 regno
= FIRST_SSE_REG
;
3605 /* 64-byte vector modes in %zmm0. */
3606 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
3607 regno
= FIRST_SSE_REG
;
3609 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3610 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
3611 regno
= FIRST_FLOAT_REG
;
3613 /* Most things go in %eax. */
3616 /* Override FP return register with %xmm0 for local functions when
3617 SSE math is enabled or for functions with sseregparm attribute. */
3618 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
3620 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
3621 if (sse_level
== -1)
3623 error ("calling %qD with SSE calling convention without "
3624 "SSE/SSE2 enabled", fn
);
3625 sorry ("this is a GCC bug that can be worked around by adding "
3626 "attribute used to function called");
3628 else if ((sse_level
>= 1 && mode
== SFmode
)
3629 || (sse_level
== 2 && mode
== DFmode
))
3630 regno
= FIRST_SSE_REG
;
3633 /* OImode shouldn't be used directly. */
3634 gcc_assert (mode
!= OImode
);
3636 return gen_rtx_REG (orig_mode
, regno
);
3640 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
3645 /* Handle libcalls, which don't provide a type node. */
3646 if (valtype
== NULL
)
3660 regno
= FIRST_SSE_REG
;
3664 regno
= FIRST_FLOAT_REG
;
3672 return gen_rtx_REG (mode
, regno
);
3674 else if (POINTER_TYPE_P (valtype
))
3676 /* Pointers are always returned in word_mode. */
3680 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
3681 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
3682 x86_64_int_return_registers
, 0);
3684 /* For zero sized structures, construct_container returns NULL, but we
3685 need to keep rest of compiler happy by returning meaningful value. */
3687 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
3693 function_value_ms_32 (machine_mode orig_mode
, machine_mode mode
,
3694 const_tree fntype
, const_tree fn
, const_tree valtype
)
3698 /* Floating point return values in %st(0)
3699 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3700 if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
3701 && (GET_MODE_SIZE (mode
) > 8
3702 || valtype
== NULL_TREE
|| !AGGREGATE_TYPE_P (valtype
)))
3704 regno
= FIRST_FLOAT_REG
;
3705 return gen_rtx_REG (orig_mode
, regno
);
3708 return function_value_32(orig_mode
, mode
, fntype
,fn
);
3712 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
3715 unsigned int regno
= AX_REG
;
3719 switch (GET_MODE_SIZE (mode
))
3722 if (valtype
!= NULL_TREE
3723 && !VECTOR_INTEGER_TYPE_P (valtype
)
3724 && !VECTOR_INTEGER_TYPE_P (valtype
)
3725 && !INTEGRAL_TYPE_P (valtype
)
3726 && !VECTOR_FLOAT_TYPE_P (valtype
))
3728 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
3729 && !COMPLEX_MODE_P (mode
))
3730 regno
= FIRST_SSE_REG
;
3734 if (valtype
!= NULL_TREE
&& AGGREGATE_TYPE_P (valtype
))
3736 if (mode
== SFmode
|| mode
== DFmode
)
3737 regno
= FIRST_SSE_REG
;
3743 return gen_rtx_REG (orig_mode
, regno
);
3747 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
3748 machine_mode orig_mode
, machine_mode mode
)
3750 const_tree fn
, fntype
;
3753 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
3754 fn
= fntype_or_decl
;
3755 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3757 if (ix86_function_type_abi (fntype
) == MS_ABI
)
3760 return function_value_ms_64 (orig_mode
, mode
, valtype
);
3762 return function_value_ms_32 (orig_mode
, mode
, fntype
, fn
, valtype
);
3764 else if (TARGET_64BIT
)
3765 return function_value_64 (orig_mode
, mode
, valtype
);
3767 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
3771 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
3773 machine_mode mode
, orig_mode
;
3775 orig_mode
= TYPE_MODE (valtype
);
3776 mode
= type_natural_mode (valtype
, NULL
, true);
3777 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
3780 /* Pointer function arguments and return values are promoted to
3781 word_mode for normal functions. */
3784 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
3785 int *punsignedp
, const_tree fntype
,
3788 if (cfun
->machine
->func_type
== TYPE_NORMAL
3789 && type
!= NULL_TREE
3790 && POINTER_TYPE_P (type
))
3792 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
3795 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
3799 /* Return true if a structure, union or array with MODE containing FIELD
3800 should be accessed using BLKmode. */
3803 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
3805 /* Union with XFmode must be in BLKmode. */
3806 return (mode
== XFmode
3807 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
3808 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
3812 ix86_libcall_value (machine_mode mode
)
3814 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
3817 /* Return true iff type is returned in memory. */
3820 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
3822 #ifdef SUBTARGET_RETURN_IN_MEMORY
3823 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
3825 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
3830 if (ix86_function_type_abi (fntype
) == MS_ABI
)
3832 size
= int_size_in_bytes (type
);
3834 /* __m128 is returned in xmm0. */
3835 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
3836 || INTEGRAL_TYPE_P (type
)
3837 || VECTOR_FLOAT_TYPE_P (type
))
3838 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
3839 && !COMPLEX_MODE_P (mode
)
3840 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
3843 /* Otherwise, the size must be exactly in [1248]. */
3844 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
3848 int needed_intregs
, needed_sseregs
;
3850 return examine_argument (mode
, type
, 1,
3851 &needed_intregs
, &needed_sseregs
);
3856 size
= int_size_in_bytes (type
);
3858 /* Intel MCU psABI returns scalars and aggregates no larger than 8
3859 bytes in registers. */
3861 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
3863 if (mode
== BLKmode
)
3866 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3869 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3871 /* User-created vectors small enough to fit in EAX. */
3875 /* Unless ABI prescibes otherwise,
3876 MMX/3dNow values are returned in MM0 if available. */
3879 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
3881 /* SSE values are returned in XMM0 if available. */
3885 /* AVX values are returned in YMM0 if available. */
3889 /* AVX512F values are returned in ZMM0 if available. */
3891 return !TARGET_AVX512F
;
3900 /* OImode shouldn't be used directly. */
3901 gcc_assert (mode
!= OImode
);
3909 /* Create the va_list data type. */
3912 ix86_build_builtin_va_list_64 (void)
3914 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3916 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
3917 type_decl
= build_decl (BUILTINS_LOCATION
,
3918 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3920 f_gpr
= build_decl (BUILTINS_LOCATION
,
3921 FIELD_DECL
, get_identifier ("gp_offset"),
3922 unsigned_type_node
);
3923 f_fpr
= build_decl (BUILTINS_LOCATION
,
3924 FIELD_DECL
, get_identifier ("fp_offset"),
3925 unsigned_type_node
);
3926 f_ovf
= build_decl (BUILTINS_LOCATION
,
3927 FIELD_DECL
, get_identifier ("overflow_arg_area"),
3929 f_sav
= build_decl (BUILTINS_LOCATION
,
3930 FIELD_DECL
, get_identifier ("reg_save_area"),
3933 va_list_gpr_counter_field
= f_gpr
;
3934 va_list_fpr_counter_field
= f_fpr
;
3936 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3937 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3938 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3939 DECL_FIELD_CONTEXT (f_sav
) = record
;
3941 TYPE_STUB_DECL (record
) = type_decl
;
3942 TYPE_NAME (record
) = type_decl
;
3943 TYPE_FIELDS (record
) = f_gpr
;
3944 DECL_CHAIN (f_gpr
) = f_fpr
;
3945 DECL_CHAIN (f_fpr
) = f_ovf
;
3946 DECL_CHAIN (f_ovf
) = f_sav
;
3948 layout_type (record
);
3950 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
3951 NULL_TREE
, TYPE_ATTRIBUTES (record
));
3953 /* The correct type is an array type of one element. */
3954 return build_array_type (record
, build_index_type (size_zero_node
));
3957 /* Setup the builtin va_list data type and for 64-bit the additional
3958 calling convention specific va_list data types. */
3961 ix86_build_builtin_va_list (void)
3965 /* Initialize ABI specific va_list builtin types.
3967 In lto1, we can encounter two va_list types:
3968 - one as a result of the type-merge across TUs, and
3969 - the one constructed here.
3970 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3971 a type identity check in canonical_va_list_type based on
3972 TYPE_MAIN_VARIANT (which we used to have) will not work.
3973 Instead, we tag each va_list_type_node with its unique attribute, and
3974 look for the attribute in the type identity check in
3975 canonical_va_list_type.
3977 Tagging sysv_va_list_type_node directly with the attribute is
3978 problematic since it's a array of one record, which will degrade into a
3979 pointer to record when used as parameter (see build_va_arg comments for
3980 an example), dropping the attribute in the process. So we tag the
3983 /* For SYSV_ABI we use an array of one record. */
3984 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
3986 /* For MS_ABI we use plain pointer to argument area. */
3987 tree char_ptr_type
= build_pointer_type (char_type_node
);
3988 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
3989 TYPE_ATTRIBUTES (char_ptr_type
));
3990 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
3992 return ((ix86_abi
== MS_ABI
)
3993 ? ms_va_list_type_node
3994 : sysv_va_list_type_node
);
3998 /* For i386 we use plain pointer to argument area. */
3999 return build_pointer_type (char_type_node
);
4003 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4006 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4012 /* GPR size of varargs save area. */
4013 if (cfun
->va_list_gpr_size
)
4014 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
4016 ix86_varargs_gpr_size
= 0;
4018 /* FPR size of varargs save area. We don't need it if we don't pass
4019 anything in SSE registers. */
4020 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4021 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
4023 ix86_varargs_fpr_size
= 0;
4025 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
4028 save_area
= frame_pointer_rtx
;
4029 set
= get_varargs_alias_set ();
4031 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4032 if (max
> X86_64_REGPARM_MAX
)
4033 max
= X86_64_REGPARM_MAX
;
4035 for (i
= cum
->regno
; i
< max
; i
++)
4037 mem
= gen_rtx_MEM (word_mode
,
4038 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
4039 MEM_NOTRAP_P (mem
) = 1;
4040 set_mem_alias_set (mem
, set
);
4041 emit_move_insn (mem
,
4042 gen_rtx_REG (word_mode
,
4043 x86_64_int_parameter_registers
[i
]));
4046 if (ix86_varargs_fpr_size
)
4049 rtx_code_label
*label
;
4052 /* Now emit code to save SSE registers. The AX parameter contains number
4053 of SSE parameter registers used to call this function, though all we
4054 actually check here is the zero/non-zero status. */
4056 label
= gen_label_rtx ();
4057 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
4058 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
4061 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4062 we used movdqa (i.e. TImode) instead? Perhaps even better would
4063 be if we could determine the real mode of the data, via a hook
4064 into pass_stdarg. Ignore all that for now. */
4066 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
4067 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
4069 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
4070 if (max
> X86_64_SSE_REGPARM_MAX
)
4071 max
= X86_64_SSE_REGPARM_MAX
;
4073 for (i
= cum
->sse_regno
; i
< max
; ++i
)
4075 mem
= plus_constant (Pmode
, save_area
,
4076 i
* 16 + ix86_varargs_gpr_size
);
4077 mem
= gen_rtx_MEM (smode
, mem
);
4078 MEM_NOTRAP_P (mem
) = 1;
4079 set_mem_alias_set (mem
, set
);
4080 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
4082 emit_move_insn (mem
, gen_rtx_REG (smode
, GET_SSE_REGNO (i
)));
4090 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4092 alias_set_type set
= get_varargs_alias_set ();
4095 /* Reset to zero, as there might be a sysv vaarg used
4097 ix86_varargs_gpr_size
= 0;
4098 ix86_varargs_fpr_size
= 0;
4100 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
4104 mem
= gen_rtx_MEM (Pmode
,
4105 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4106 i
* UNITS_PER_WORD
));
4107 MEM_NOTRAP_P (mem
) = 1;
4108 set_mem_alias_set (mem
, set
);
4110 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4111 emit_move_insn (mem
, reg
);
4116 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
4117 tree type
, int *, int no_rtl
)
4119 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4120 CUMULATIVE_ARGS next_cum
;
4123 /* This argument doesn't appear to be used anymore. Which is good,
4124 because the old code here didn't suppress rtl generation. */
4125 gcc_assert (!no_rtl
);
4130 fntype
= TREE_TYPE (current_function_decl
);
4132 /* For varargs, we do not want to skip the dummy va_dcl argument.
4133 For stdargs, we do want to skip the last named argument. */
4135 if (stdarg_p (fntype
))
4136 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
4139 if (cum
->call_abi
== MS_ABI
)
4140 setup_incoming_varargs_ms_64 (&next_cum
);
4142 setup_incoming_varargs_64 (&next_cum
);
4146 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v
,
4149 int *pretend_size ATTRIBUTE_UNUSED
,
4152 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4153 CUMULATIVE_ARGS next_cum
;
4157 gcc_assert (!no_rtl
);
4159 /* Do nothing if we use plain pointer to argument area. */
4160 if (!TARGET_64BIT
|| cum
->call_abi
== MS_ABI
)
4163 fntype
= TREE_TYPE (current_function_decl
);
4165 /* For varargs, we do not want to skip the dummy va_dcl argument.
4166 For stdargs, we do want to skip the last named argument. */
4168 if (stdarg_p (fntype
))
4169 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
4172 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4173 if (max
> X86_64_REGPARM_MAX
)
4174 max
= X86_64_REGPARM_MAX
;
4178 /* Checks if TYPE is of kind va_list char *. */
4181 is_va_list_char_pointer (tree type
)
4185 /* For 32-bit it is always true. */
4188 canonic
= ix86_canonical_va_list_type (type
);
4189 return (canonic
== ms_va_list_type_node
4190 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
4193 /* Implement va_start. */
4196 ix86_va_start (tree valist
, rtx nextarg
)
4198 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4199 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4200 tree gpr
, fpr
, ovf
, sav
, t
;
4204 if (flag_split_stack
4205 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4207 unsigned int scratch_regno
;
4209 /* When we are splitting the stack, we can't refer to the stack
4210 arguments using internal_arg_pointer, because they may be on
4211 the old stack. The split stack prologue will arrange to
4212 leave a pointer to the old stack arguments in a scratch
4213 register, which we here copy to a pseudo-register. The split
4214 stack prologue can't set the pseudo-register directly because
4215 it (the prologue) runs before any registers have been saved. */
4217 scratch_regno
= split_stack_prologue_scratch_regno ();
4218 if (scratch_regno
!= INVALID_REGNUM
)
4223 reg
= gen_reg_rtx (Pmode
);
4224 cfun
->machine
->split_stack_varargs_pointer
= reg
;
4227 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
4231 push_topmost_sequence ();
4232 emit_insn_after (seq
, entry_of_function ());
4233 pop_topmost_sequence ();
4237 /* Only 64bit target needs something special. */
4238 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4240 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4241 std_expand_builtin_va_start (valist
, nextarg
);
4246 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
4247 next
= expand_binop (ptr_mode
, add_optab
,
4248 cfun
->machine
->split_stack_varargs_pointer
,
4249 crtl
->args
.arg_offset_rtx
,
4250 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4251 convert_move (va_r
, next
, 0);
4256 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4257 f_fpr
= DECL_CHAIN (f_gpr
);
4258 f_ovf
= DECL_CHAIN (f_fpr
);
4259 f_sav
= DECL_CHAIN (f_ovf
);
4261 valist
= build_simple_mem_ref (valist
);
4262 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
4263 /* The following should be folded into the MEM_REF offset. */
4264 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
4266 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
4268 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
4270 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
4273 /* Count number of gp and fp argument registers used. */
4274 words
= crtl
->args
.info
.words
;
4275 n_gpr
= crtl
->args
.info
.regno
;
4276 n_fpr
= crtl
->args
.info
.sse_regno
;
4278 if (cfun
->va_list_gpr_size
)
4280 type
= TREE_TYPE (gpr
);
4281 t
= build2 (MODIFY_EXPR
, type
,
4282 gpr
, build_int_cst (type
, n_gpr
* 8));
4283 TREE_SIDE_EFFECTS (t
) = 1;
4284 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4287 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4289 type
= TREE_TYPE (fpr
);
4290 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4291 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
4292 TREE_SIDE_EFFECTS (t
) = 1;
4293 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4296 /* Find the overflow area. */
4297 type
= TREE_TYPE (ovf
);
4298 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4299 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
4301 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
4302 t
= make_tree (type
, ovf_rtx
);
4304 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
4306 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4307 TREE_SIDE_EFFECTS (t
) = 1;
4308 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4310 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
4312 /* Find the register save area.
4313 Prologue of the function save it right above stack frame. */
4314 type
= TREE_TYPE (sav
);
4315 t
= make_tree (type
, frame_pointer_rtx
);
4316 if (!ix86_varargs_gpr_size
)
4317 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
4319 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4320 TREE_SIDE_EFFECTS (t
) = 1;
4321 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4325 /* Implement va_arg. */
4328 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4331 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4332 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4333 tree gpr
, fpr
, ovf
, sav
, t
;
4335 tree lab_false
, lab_over
= NULL_TREE
;
4340 machine_mode nat_mode
;
4341 unsigned int arg_boundary
;
4343 /* Only 64bit target needs something special. */
4344 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4345 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4347 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4348 f_fpr
= DECL_CHAIN (f_gpr
);
4349 f_ovf
= DECL_CHAIN (f_fpr
);
4350 f_sav
= DECL_CHAIN (f_ovf
);
4352 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
4353 valist
, f_gpr
, NULL_TREE
);
4355 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4356 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4357 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4359 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4361 type
= build_pointer_type (type
);
4362 size
= arg_int_size_in_bytes (type
);
4363 rsize
= CEIL (size
, UNITS_PER_WORD
);
4365 nat_mode
= type_natural_mode (type
, NULL
, false);
4380 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4381 if (!TARGET_64BIT_MS_ABI
)
4389 container
= construct_container (nat_mode
, TYPE_MODE (type
),
4390 type
, 0, X86_64_REGPARM_MAX
,
4391 X86_64_SSE_REGPARM_MAX
, intreg
,
4396 /* Pull the value out of the saved registers. */
4398 addr
= create_tmp_var (ptr_type_node
, "addr");
4402 int needed_intregs
, needed_sseregs
;
4404 tree int_addr
, sse_addr
;
4406 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
4407 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
4409 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4411 need_temp
= (!REG_P (container
)
4412 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4413 || TYPE_ALIGN (type
) > 128));
4415 /* In case we are passing structure, verify that it is consecutive block
4416 on the register save area. If not we need to do moves. */
4417 if (!need_temp
&& !REG_P (container
))
4419 /* Verify that all registers are strictly consecutive */
4420 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4424 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4426 rtx slot
= XVECEXP (container
, 0, i
);
4427 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4428 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4436 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4438 rtx slot
= XVECEXP (container
, 0, i
);
4439 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4440 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4452 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4453 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4456 /* First ensure that we fit completely in registers. */
4459 t
= build_int_cst (TREE_TYPE (gpr
),
4460 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
4461 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4462 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4463 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4464 gimplify_and_add (t
, pre_p
);
4468 t
= build_int_cst (TREE_TYPE (fpr
),
4469 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4470 + X86_64_REGPARM_MAX
* 8);
4471 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4472 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4473 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4474 gimplify_and_add (t
, pre_p
);
4477 /* Compute index to start of area used for integer regs. */
4480 /* int_addr = gpr + sav; */
4481 t
= fold_build_pointer_plus (sav
, gpr
);
4482 gimplify_assign (int_addr
, t
, pre_p
);
4486 /* sse_addr = fpr + sav; */
4487 t
= fold_build_pointer_plus (sav
, fpr
);
4488 gimplify_assign (sse_addr
, t
, pre_p
);
4492 int i
, prev_size
= 0;
4493 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4496 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4497 gimplify_assign (addr
, t
, pre_p
);
4499 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4501 rtx slot
= XVECEXP (container
, 0, i
);
4502 rtx reg
= XEXP (slot
, 0);
4503 machine_mode mode
= GET_MODE (reg
);
4509 tree dest_addr
, dest
;
4510 int cur_size
= GET_MODE_SIZE (mode
);
4512 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
4513 prev_size
= INTVAL (XEXP (slot
, 1));
4514 if (prev_size
+ cur_size
> size
)
4516 cur_size
= size
- prev_size
;
4517 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
4518 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
4521 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4522 if (mode
== GET_MODE (reg
))
4523 addr_type
= build_pointer_type (piece_type
);
4525 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4527 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4530 if (SSE_REGNO_P (REGNO (reg
)))
4532 src_addr
= sse_addr
;
4533 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4537 src_addr
= int_addr
;
4538 src_offset
= REGNO (reg
) * 8;
4540 src_addr
= fold_convert (addr_type
, src_addr
);
4541 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
4543 dest_addr
= fold_convert (daddr_type
, addr
);
4544 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
4545 if (cur_size
== GET_MODE_SIZE (mode
))
4547 src
= build_va_arg_indirect_ref (src_addr
);
4548 dest
= build_va_arg_indirect_ref (dest_addr
);
4550 gimplify_assign (dest
, src
, pre_p
);
4555 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
4556 3, dest_addr
, src_addr
,
4557 size_int (cur_size
));
4558 gimplify_and_add (copy
, pre_p
);
4560 prev_size
+= cur_size
;
4566 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4567 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4568 gimplify_assign (gpr
, t
, pre_p
);
4573 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4574 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4575 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
4578 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
4580 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
4583 /* ... otherwise out of the overflow area. */
4585 /* When we align parameter on stack for caller, if the parameter
4586 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4587 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4588 here with caller. */
4589 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
4590 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
4591 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
4593 /* Care for on-stack alignment if needed. */
4594 if (arg_boundary
<= 64 || size
== 0)
4598 HOST_WIDE_INT align
= arg_boundary
/ 8;
4599 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
4600 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4601 build_int_cst (TREE_TYPE (t
), -align
));
4604 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4605 gimplify_assign (addr
, t
, pre_p
);
4607 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
4608 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
4611 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
4613 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
4614 addr
= fold_convert (ptrtype
, addr
);
4617 addr
= build_va_arg_indirect_ref (addr
);
4618 return build_va_arg_indirect_ref (addr
);
4621 /* Return true if OPNUM's MEM should be matched
4622 in movabs* patterns. */
4625 ix86_check_movabs (rtx insn
, int opnum
)
4629 set
= PATTERN (insn
);
4630 if (GET_CODE (set
) == PARALLEL
)
4631 set
= XVECEXP (set
, 0, 0);
4632 gcc_assert (GET_CODE (set
) == SET
);
4633 mem
= XEXP (set
, opnum
);
4634 while (SUBREG_P (mem
))
4635 mem
= SUBREG_REG (mem
);
4636 gcc_assert (MEM_P (mem
));
4637 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
4640 /* Return false if INSN contains a MEM with a non-default address space. */
4642 ix86_check_no_addr_space (rtx insn
)
4644 subrtx_var_iterator::array_type array
;
4645 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
4648 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
4654 /* Initialize the table of extra 80387 mathematical constants. */
4657 init_ext_80387_constants (void)
4659 static const char * cst
[5] =
4661 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4662 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4663 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4664 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4665 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4669 for (i
= 0; i
< 5; i
++)
4671 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4672 /* Ensure each constant is rounded to XFmode precision. */
4673 real_convert (&ext_80387_constants_table
[i
],
4674 XFmode
, &ext_80387_constants_table
[i
]);
4677 ext_80387_constants_init
= 1;
4680 /* Return non-zero if the constant is something that
4681 can be loaded with a special instruction. */
4684 standard_80387_constant_p (rtx x
)
4686 machine_mode mode
= GET_MODE (x
);
4688 const REAL_VALUE_TYPE
*r
;
4690 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
4693 if (x
== CONST0_RTX (mode
))
4695 if (x
== CONST1_RTX (mode
))
4698 r
= CONST_DOUBLE_REAL_VALUE (x
);
4700 /* For XFmode constants, try to find a special 80387 instruction when
4701 optimizing for size or on those CPUs that benefit from them. */
4703 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
4707 if (! ext_80387_constants_init
)
4708 init_ext_80387_constants ();
4710 for (i
= 0; i
< 5; i
++)
4711 if (real_identical (r
, &ext_80387_constants_table
[i
]))
4715 /* Load of the constant -0.0 or -1.0 will be split as
4716 fldz;fchs or fld1;fchs sequence. */
4717 if (real_isnegzero (r
))
4719 if (real_identical (r
, &dconstm1
))
4725 /* Return the opcode of the special instruction to be used to load
4729 standard_80387_constant_opcode (rtx x
)
4731 switch (standard_80387_constant_p (x
))
4755 /* Return the CONST_DOUBLE representing the 80387 constant that is
4756 loaded by the specified special instruction. The argument IDX
4757 matches the return value from standard_80387_constant_p. */
4760 standard_80387_constant_rtx (int idx
)
4764 if (! ext_80387_constants_init
)
4765 init_ext_80387_constants ();
4781 return const_double_from_real_value (ext_80387_constants_table
[i
],
4785 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4786 in supported SSE/AVX vector mode. */
4789 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
4796 mode
= GET_MODE (x
);
4798 if (x
== const0_rtx
|| const0_operand (x
, mode
))
4801 if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
4803 /* VOIDmode integer constant, get mode from the predicate. */
4804 if (mode
== VOIDmode
)
4807 switch (GET_MODE_SIZE (mode
))
4832 /* Return the opcode of the special instruction to be used to load
4833 the constant operands[1] into operands[0]. */
4836 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
4839 rtx x
= operands
[1];
4841 gcc_assert (TARGET_SSE
);
4843 mode
= GET_MODE (x
);
4845 if (x
== const0_rtx
|| const0_operand (x
, mode
))
4847 switch (get_attr_mode (insn
))
4850 if (!EXT_REX_SSE_REG_P (operands
[0]))
4851 return "%vpxor\t%0, %d0";
4855 if (EXT_REX_SSE_REG_P (operands
[0]))
4856 return (TARGET_AVX512VL
4857 ? "vpxord\t%x0, %x0, %x0"
4858 : "vpxord\t%g0, %g0, %g0");
4859 return "vpxor\t%x0, %x0, %x0";
4862 if (!EXT_REX_SSE_REG_P (operands
[0]))
4863 return "%vxorpd\t%0, %d0";
4867 if (!EXT_REX_SSE_REG_P (operands
[0]))
4868 return "vxorpd\t%x0, %x0, %x0";
4869 else if (TARGET_AVX512DQ
)
4870 return (TARGET_AVX512VL
4871 ? "vxorpd\t%x0, %x0, %x0"
4872 : "vxorpd\t%g0, %g0, %g0");
4874 return (TARGET_AVX512VL
4875 ? "vpxorq\t%x0, %x0, %x0"
4876 : "vpxorq\t%g0, %g0, %g0");
4879 if (!EXT_REX_SSE_REG_P (operands
[0]))
4880 return "%vxorps\t%0, %d0";
4884 if (!EXT_REX_SSE_REG_P (operands
[0]))
4885 return "vxorps\t%x0, %x0, %x0";
4886 else if (TARGET_AVX512DQ
)
4887 return (TARGET_AVX512VL
4888 ? "vxorps\t%x0, %x0, %x0"
4889 : "vxorps\t%g0, %g0, %g0");
4891 return (TARGET_AVX512VL
4892 ? "vpxord\t%x0, %x0, %x0"
4893 : "vpxord\t%g0, %g0, %g0");
4899 else if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
4901 enum attr_mode insn_mode
= get_attr_mode (insn
);
4908 gcc_assert (TARGET_AVX512F
);
4909 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4914 gcc_assert (TARGET_AVX2
);
4919 gcc_assert (TARGET_SSE2
);
4920 if (!EXT_REX_SSE_REG_P (operands
[0]))
4922 ? "vpcmpeqd\t%0, %0, %0"
4923 : "pcmpeqd\t%0, %0");
4924 else if (TARGET_AVX512VL
)
4925 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4927 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4937 /* Returns true if INSN can be transformed from a memory load
4938 to a supported FP constant load. */
4941 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
4943 rtx src
= find_constant_src (insn
);
4945 gcc_assert (REG_P (dst
));
4948 || (SSE_REGNO_P (REGNO (dst
))
4949 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
4950 || (STACK_REGNO_P (REGNO (dst
))
4951 && standard_80387_constant_p (src
) < 1))
4957 /* Returns true if OP contains a symbol reference */
4960 symbolic_reference_mentioned_p (rtx op
)
4965 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4968 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4969 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4975 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4976 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4980 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4987 /* Return true if it is appropriate to emit `ret' instructions in the
4988 body of a function. Do this only if the epilogue is simple, needing a
4989 couple of insns. Prior to reloading, we can't tell how many registers
4990 must be saved, so return false then. Return false if there is no frame
4991 marker to de-allocate. */
4994 ix86_can_use_return_insn_p (void)
4996 if (ix86_function_naked (current_function_decl
))
4999 /* Don't use `ret' instruction in interrupt handler. */
5000 if (! reload_completed
5001 || frame_pointer_needed
5002 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
5005 /* Don't allow more than 32k pop, since that's all we can do
5006 with one instruction. */
5007 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
5010 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5011 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
5012 && (frame
.nregs
+ frame
.nsseregs
) == 0);
5015 /* Value should be nonzero if functions must have frame pointers.
5016 Zero means the frame pointer need not be set up (and parms may
5017 be accessed via the stack pointer) in functions that seem suitable. */
5020 ix86_frame_pointer_required (void)
5022 /* If we accessed previous frames, then the generated code expects
5023 to be able to access the saved ebp value in our frame. */
5024 if (cfun
->machine
->accesses_prev_frame
)
5027 /* Several x86 os'es need a frame pointer for other reasons,
5028 usually pertaining to setjmp. */
5029 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5032 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5033 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
5036 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5037 allocation is 4GB. */
5038 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
5041 /* SSE saves require frame-pointer when stack is misaligned. */
5042 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
5045 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5046 turns off the frame pointer by default. Turn it back on now if
5047 we've not got a leaf function. */
5048 if (TARGET_OMIT_LEAF_FRAME_POINTER
5050 || ix86_current_function_calls_tls_descriptor
))
5053 if (crtl
->profile
&& !flag_fentry
)
5059 /* Record that the current function accesses previous call frames. */
5062 ix86_setup_frame_addresses (void)
5064 cfun
->machine
->accesses_prev_frame
= 1;
5067 #ifndef USE_HIDDEN_LINKONCE
5068 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5069 # define USE_HIDDEN_LINKONCE 1
5071 # define USE_HIDDEN_LINKONCE 0
5075 /* Label count for call and return thunks. It is used to make unique
5076 labels in call and return thunks. */
5077 static int indirectlabelno
;
5079 /* True if call thunk function is needed. */
5080 static bool indirect_thunk_needed
= false;
5082 /* Bit masks of integer registers, which contain branch target, used
5083 by call thunk functions. */
5084 static int indirect_thunks_used
;
5086 /* True if return thunk function is needed. */
5087 static bool indirect_return_needed
= false;
5089 /* True if return thunk function via CX is needed. */
5090 static bool indirect_return_via_cx
;
5092 #ifndef INDIRECT_LABEL
5093 # define INDIRECT_LABEL "LIND"
5096 /* Indicate what prefix is needed for an indirect branch. */
5097 enum indirect_thunk_prefix
5099 indirect_thunk_prefix_none
,
5100 indirect_thunk_prefix_nt
5103 /* Return the prefix needed for an indirect branch INSN. */
5105 enum indirect_thunk_prefix
5106 indirect_thunk_need_prefix (rtx_insn
*insn
)
5108 enum indirect_thunk_prefix need_prefix
;
5109 if ((cfun
->machine
->indirect_branch_type
5110 == indirect_branch_thunk_extern
)
5111 && ix86_notrack_prefixed_insn_p (insn
))
5113 /* NOTRACK prefix is only used with external thunk so that it
5114 can be properly updated to support CET at run-time. */
5115 need_prefix
= indirect_thunk_prefix_nt
;
5118 need_prefix
= indirect_thunk_prefix_none
;
5122 /* Fills in the label name that should be used for the indirect thunk. */
5125 indirect_thunk_name (char name
[32], unsigned int regno
,
5126 enum indirect_thunk_prefix need_prefix
,
5129 if (regno
!= INVALID_REGNUM
&& regno
!= CX_REG
&& ret_p
)
5132 if (USE_HIDDEN_LINKONCE
)
5136 if (need_prefix
== indirect_thunk_prefix_nt
5137 && regno
!= INVALID_REGNUM
)
5139 /* NOTRACK prefix is only used with external thunk via
5140 register so that NOTRACK prefix can be added to indirect
5141 branch via register to support CET at run-time. */
5147 const char *ret
= ret_p
? "return" : "indirect";
5149 if (regno
!= INVALID_REGNUM
)
5151 const char *reg_prefix
;
5152 if (LEGACY_INT_REGNO_P (regno
))
5153 reg_prefix
= TARGET_64BIT
? "r" : "e";
5156 sprintf (name
, "__x86_%s_thunk%s_%s%s",
5157 ret
, prefix
, reg_prefix
, reg_names
[regno
]);
5160 sprintf (name
, "__x86_%s_thunk%s", ret
, prefix
);
5164 if (regno
!= INVALID_REGNUM
)
5165 ASM_GENERATE_INTERNAL_LABEL (name
, "LITR", regno
);
5169 ASM_GENERATE_INTERNAL_LABEL (name
, "LRT", 0);
5171 ASM_GENERATE_INTERNAL_LABEL (name
, "LIT", 0);
5176 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5177 the function address is in REGNO and the call and return thunk looks like:
5188 Otherwise, the function address is on the top of stack and the
5189 call and return thunk looks like:
5197 lea WORD_SIZE(%sp), %sp
5202 output_indirect_thunk (unsigned int regno
)
5204 char indirectlabel1
[32];
5205 char indirectlabel2
[32];
5207 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
, INDIRECT_LABEL
,
5209 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
, INDIRECT_LABEL
,
5213 fputs ("\tcall\t", asm_out_file
);
5214 assemble_name_raw (asm_out_file
, indirectlabel2
);
5215 fputc ('\n', asm_out_file
);
5217 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
5219 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5220 Usage of both pause + lfence is compromise solution. */
5221 fprintf (asm_out_file
, "\tpause\n\tlfence\n");
5224 fputs ("\tjmp\t", asm_out_file
);
5225 assemble_name_raw (asm_out_file
, indirectlabel1
);
5226 fputc ('\n', asm_out_file
);
5228 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
5230 /* The above call insn pushed a word to stack. Adjust CFI info. */
5231 if (flag_asynchronous_unwind_tables
&& dwarf2out_do_frame ())
5233 if (! dwarf2out_do_cfi_asm ())
5235 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5236 xcfi
->dw_cfi_opc
= DW_CFA_advance_loc4
;
5237 xcfi
->dw_cfi_oprnd1
.dw_cfi_addr
= ggc_strdup (indirectlabel2
);
5238 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5240 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5241 xcfi
->dw_cfi_opc
= DW_CFA_def_cfa_offset
;
5242 xcfi
->dw_cfi_oprnd1
.dw_cfi_offset
= 2 * UNITS_PER_WORD
;
5243 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5244 dwarf2out_emit_cfi (xcfi
);
5247 if (regno
!= INVALID_REGNUM
)
5251 xops
[0] = gen_rtx_MEM (word_mode
, stack_pointer_rtx
);
5252 xops
[1] = gen_rtx_REG (word_mode
, regno
);
5253 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops
);
5259 xops
[0] = stack_pointer_rtx
;
5260 xops
[1] = plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
5261 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops
);
5264 fputs ("\tret\n", asm_out_file
);
5267 /* Output a funtion with a call and return thunk for indirect branch.
5268 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5269 Otherwise, the function address is on the top of stack. Thunk is
5270 used for function return if RET_P is true. */
5273 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix
,
5274 unsigned int regno
, bool ret_p
)
5279 /* Create __x86_indirect_thunk. */
5280 indirect_thunk_name (name
, regno
, need_prefix
, ret_p
);
5281 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5282 get_identifier (name
),
5283 build_function_type_list (void_type_node
, NULL_TREE
));
5284 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5285 NULL_TREE
, void_type_node
);
5286 TREE_PUBLIC (decl
) = 1;
5287 TREE_STATIC (decl
) = 1;
5288 DECL_IGNORED_P (decl
) = 1;
5293 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5294 fputs ("\t.weak_definition\t", asm_out_file
);
5295 assemble_name (asm_out_file
, name
);
5296 fputs ("\n\t.private_extern\t", asm_out_file
);
5297 assemble_name (asm_out_file
, name
);
5298 putc ('\n', asm_out_file
);
5299 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5300 DECL_WEAK (decl
) = 1;
5304 if (USE_HIDDEN_LINKONCE
)
5306 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5308 targetm
.asm_out
.unique_section (decl
, 0);
5309 switch_to_section (get_named_section (decl
, NULL
, 0));
5311 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5312 fputs ("\t.hidden\t", asm_out_file
);
5313 assemble_name (asm_out_file
, name
);
5314 putc ('\n', asm_out_file
);
5315 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5319 switch_to_section (text_section
);
5320 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5323 DECL_INITIAL (decl
) = make_node (BLOCK
);
5324 current_function_decl
= decl
;
5325 allocate_struct_function (decl
, false);
5326 init_function_start (decl
);
5327 /* We're about to hide the function body from callees of final_* by
5328 emitting it directly; tell them we're a thunk, if they care. */
5329 cfun
->is_thunk
= true;
5330 first_function_block_is_cold
= false;
5331 /* Make sure unwind info is emitted for the thunk if needed. */
5332 final_start_function (emit_barrier (), asm_out_file
, 1);
5334 output_indirect_thunk (regno
);
5336 final_end_function ();
5337 init_insn_lengths ();
5338 free_after_compilation (cfun
);
5340 current_function_decl
= NULL
;
5343 static int pic_labels_used
;
5345 /* Fills in the label name that should be used for a pc thunk for
5346 the given register. */
5349 get_pc_thunk_name (char name
[32], unsigned int regno
)
5351 gcc_assert (!TARGET_64BIT
);
5353 if (USE_HIDDEN_LINKONCE
)
5354 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
5356 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5360 /* This function generates code for -fpic that loads %ebx with
5361 the return address of the caller and then returns. */
5364 ix86_code_end (void)
5369 if (indirect_return_needed
)
5370 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5371 INVALID_REGNUM
, true);
5372 if (indirect_return_via_cx
)
5373 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5375 if (indirect_thunk_needed
)
5376 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5377 INVALID_REGNUM
, false);
5379 for (regno
= FIRST_REX_INT_REG
; regno
<= LAST_REX_INT_REG
; regno
++)
5381 unsigned int i
= regno
- FIRST_REX_INT_REG
+ LAST_INT_REG
+ 1;
5382 if ((indirect_thunks_used
& (1 << i
)))
5383 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5387 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
5392 if ((indirect_thunks_used
& (1 << regno
)))
5393 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5396 if (!(pic_labels_used
& (1 << regno
)))
5399 get_pc_thunk_name (name
, regno
);
5401 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5402 get_identifier (name
),
5403 build_function_type_list (void_type_node
, NULL_TREE
));
5404 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5405 NULL_TREE
, void_type_node
);
5406 TREE_PUBLIC (decl
) = 1;
5407 TREE_STATIC (decl
) = 1;
5408 DECL_IGNORED_P (decl
) = 1;
5413 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5414 fputs ("\t.weak_definition\t", asm_out_file
);
5415 assemble_name (asm_out_file
, name
);
5416 fputs ("\n\t.private_extern\t", asm_out_file
);
5417 assemble_name (asm_out_file
, name
);
5418 putc ('\n', asm_out_file
);
5419 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5420 DECL_WEAK (decl
) = 1;
5424 if (USE_HIDDEN_LINKONCE
)
5426 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5428 targetm
.asm_out
.unique_section (decl
, 0);
5429 switch_to_section (get_named_section (decl
, NULL
, 0));
5431 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5432 fputs ("\t.hidden\t", asm_out_file
);
5433 assemble_name (asm_out_file
, name
);
5434 putc ('\n', asm_out_file
);
5435 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5439 switch_to_section (text_section
);
5440 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5443 DECL_INITIAL (decl
) = make_node (BLOCK
);
5444 current_function_decl
= decl
;
5445 allocate_struct_function (decl
, false);
5446 init_function_start (decl
);
5447 /* We're about to hide the function body from callees of final_* by
5448 emitting it directly; tell them we're a thunk, if they care. */
5449 cfun
->is_thunk
= true;
5450 first_function_block_is_cold
= false;
5451 /* Make sure unwind info is emitted for the thunk if needed. */
5452 final_start_function (emit_barrier (), asm_out_file
, 1);
5454 /* Pad stack IP move with 4 instructions (two NOPs count
5455 as one instruction). */
5456 if (TARGET_PAD_SHORT_FUNCTION
)
5461 fputs ("\tnop\n", asm_out_file
);
5464 xops
[0] = gen_rtx_REG (Pmode
, regno
);
5465 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
5466 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
5467 output_asm_insn ("%!ret", NULL
);
5468 final_end_function ();
5469 init_insn_lengths ();
5470 free_after_compilation (cfun
);
5472 current_function_decl
= NULL
;
5475 if (flag_split_stack
)
5476 file_end_indicate_split_stack ();
5479 /* Emit code for the SET_GOT patterns. */
5482 output_set_got (rtx dest
, rtx label
)
5488 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5490 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5491 xops
[2] = gen_rtx_MEM (Pmode
,
5492 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5493 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5495 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5496 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5497 an unadorned address. */
5498 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5499 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5500 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5504 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5509 get_pc_thunk_name (name
, REGNO (dest
));
5510 pic_labels_used
|= 1 << REGNO (dest
);
5512 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5513 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5514 output_asm_insn ("%!call\t%X2", xops
);
5517 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5518 This is what will be referenced by the Mach-O PIC subsystem. */
5519 if (machopic_should_output_picbase_label () || !label
)
5520 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
5522 /* When we are restoring the pic base at the site of a nonlocal label,
5523 and we decided to emit the pic base above, we will still output a
5524 local label used for calculating the correction offset (even though
5525 the offset will be 0 in that case). */
5527 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5528 CODE_LABEL_NUMBER (label
));
5534 /* We don't need a pic base, we're not producing pic. */
5537 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5538 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
5539 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5540 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5544 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
5549 /* Generate an "push" pattern for input ARG. */
5554 struct machine_function
*m
= cfun
->machine
;
5556 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
5557 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
5558 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
5560 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
5561 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
5563 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
5564 gen_rtx_PRE_DEC (Pmode
,
5565 stack_pointer_rtx
)),
5569 /* Generate an "pop" pattern for input ARG. */
5574 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
5575 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
5577 return gen_rtx_SET (arg
,
5578 gen_rtx_MEM (word_mode
,
5579 gen_rtx_POST_INC (Pmode
,
5580 stack_pointer_rtx
)));
5583 /* Return >= 0 if there is an unused call-clobbered register available
5584 for the entire function. */
5587 ix86_select_alt_pic_regnum (void)
5589 if (ix86_use_pseudo_pic_reg ())
5590 return INVALID_REGNUM
;
5594 && !ix86_current_function_calls_tls_descriptor
)
5597 /* Can't use the same register for both PIC and DRAP. */
5599 drap
= REGNO (crtl
->drap_reg
);
5602 for (i
= 2; i
>= 0; --i
)
5603 if (i
!= drap
&& !df_regs_ever_live_p (i
))
5607 return INVALID_REGNUM
;
5610 /* Return true if REGNO is used by the epilogue. */
5613 ix86_epilogue_uses (int regno
)
5615 /* If there are no caller-saved registers, we preserve all registers,
5616 except for MMX and x87 registers which aren't supported when saving
5617 and restoring registers. Don't explicitly save SP register since
5618 it is always preserved. */
5619 return (epilogue_completed
5620 && cfun
->machine
->no_caller_saved_registers
5621 && !fixed_regs
[regno
]
5622 && !STACK_REGNO_P (regno
)
5623 && !MMX_REGNO_P (regno
));
5626 /* Return nonzero if register REGNO can be used as a scratch register
5630 ix86_hard_regno_scratch_ok (unsigned int regno
)
5632 /* If there are no caller-saved registers, we can't use any register
5633 as a scratch register after epilogue and use REGNO as scratch
5634 register only if it has been used before to avoid saving and
5636 return (!cfun
->machine
->no_caller_saved_registers
5637 || (!epilogue_completed
5638 && df_regs_ever_live_p (regno
)));
5641 /* Return TRUE if we need to save REGNO. */
5644 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
5646 /* If there are no caller-saved registers, we preserve all registers,
5647 except for MMX and x87 registers which aren't supported when saving
5648 and restoring registers. Don't explicitly save SP register since
5649 it is always preserved. */
5650 if (cfun
->machine
->no_caller_saved_registers
)
5652 /* Don't preserve registers used for function return value. */
5653 rtx reg
= crtl
->return_rtx
;
5656 unsigned int i
= REGNO (reg
);
5657 unsigned int nregs
= REG_NREGS (reg
);
5659 if ((i
+ nregs
) == regno
)
5663 return (df_regs_ever_live_p (regno
)
5664 && !fixed_regs
[regno
]
5665 && !STACK_REGNO_P (regno
)
5666 && !MMX_REGNO_P (regno
)
5667 && (regno
!= HARD_FRAME_POINTER_REGNUM
5668 || !frame_pointer_needed
));
5671 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5672 && pic_offset_table_rtx
)
5674 if (ix86_use_pseudo_pic_reg ())
5676 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5677 _mcount in prologue. */
5678 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
5681 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5683 || crtl
->calls_eh_return
5684 || crtl
->uses_const_pool
5685 || cfun
->has_nonlocal_label
)
5686 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
5689 if (crtl
->calls_eh_return
&& maybe_eh_return
)
5694 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5695 if (test
== INVALID_REGNUM
)
5702 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
5704 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
5705 + xlogue_layout::MIN_REGS
;
5706 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
5711 && regno
== REGNO (crtl
->drap_reg
)
5712 && !cfun
->machine
->no_drap_save_restore
)
5715 return (df_regs_ever_live_p (regno
)
5716 && !call_used_regs
[regno
]
5717 && !fixed_regs
[regno
]
5718 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5721 /* Return number of saved general prupose registers. */
5724 ix86_nsaved_regs (void)
5729 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5730 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
5735 /* Return number of saved SSE registers. */
5738 ix86_nsaved_sseregs (void)
5743 if (!TARGET_64BIT_MS_ABI
)
5745 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5746 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
5751 /* Given FROM and TO register numbers, say whether this elimination is
5752 allowed. If stack alignment is needed, we can only replace argument
5753 pointer with hard frame pointer, or replace frame pointer with stack
5754 pointer. Otherwise, frame pointer elimination is automatically
5755 handled and all other eliminations are valid. */
5758 ix86_can_eliminate (const int from
, const int to
)
5760 if (stack_realign_fp
)
5761 return ((from
== ARG_POINTER_REGNUM
5762 && to
== HARD_FRAME_POINTER_REGNUM
)
5763 || (from
== FRAME_POINTER_REGNUM
5764 && to
== STACK_POINTER_REGNUM
));
5766 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
5769 /* Return the offset between two registers, one to be eliminated, and the other
5770 its replacement, at the start of a routine. */
5773 ix86_initial_elimination_offset (int from
, int to
)
5775 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5777 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5778 return frame
.hard_frame_pointer_offset
;
5779 else if (from
== FRAME_POINTER_REGNUM
5780 && to
== HARD_FRAME_POINTER_REGNUM
)
5781 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5784 gcc_assert (to
== STACK_POINTER_REGNUM
);
5786 if (from
== ARG_POINTER_REGNUM
)
5787 return frame
.stack_pointer_offset
;
5789 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5790 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5794 /* In a dynamically-aligned function, we can't know the offset from
5795 stack pointer to frame pointer, so we must ensure that setjmp
5796 eliminates fp against the hard fp (%ebp) rather than trying to
5797 index from %esp up to the top of the frame across a gap that is
5798 of unknown (at compile-time) size. */
5800 ix86_builtin_setjmp_frame_value (void)
5802 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
5805 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
5806 void warn_once_call_ms2sysv_xlogues (const char *feature
)
5808 static bool warned_once
= false;
5811 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
5817 /* Return the probing interval for -fstack-clash-protection. */
5819 static HOST_WIDE_INT
5820 get_probe_interval (void)
5822 if (flag_stack_clash_protection
)
5823 return (HOST_WIDE_INT_1U
5824 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL
));
5826 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
5829 /* When using -fsplit-stack, the allocation routines set a field in
5830 the TCB to the bottom of the stack plus this much space, measured
5833 #define SPLIT_STACK_AVAILABLE 256
5835 /* Fill structure ix86_frame about frame of currently computed function. */
5838 ix86_compute_frame_layout (void)
5840 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
5841 struct machine_function
*m
= cfun
->machine
;
5842 unsigned HOST_WIDE_INT stack_alignment_needed
;
5843 HOST_WIDE_INT offset
;
5844 unsigned HOST_WIDE_INT preferred_alignment
;
5845 HOST_WIDE_INT size
= get_frame_size ();
5846 HOST_WIDE_INT to_allocate
;
5848 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
5849 * ms_abi functions that call a sysv function. We now need to prune away
5850 * cases where it should be disabled. */
5851 if (TARGET_64BIT
&& m
->call_ms2sysv
)
5853 gcc_assert (TARGET_64BIT_MS_ABI
);
5854 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
5855 gcc_assert (!TARGET_SEH
);
5856 gcc_assert (TARGET_SSE
);
5857 gcc_assert (!ix86_using_red_zone ());
5859 if (crtl
->calls_eh_return
)
5861 gcc_assert (!reload_completed
);
5862 m
->call_ms2sysv
= false;
5863 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
5866 else if (ix86_static_chain_on_stack
)
5868 gcc_assert (!reload_completed
);
5869 m
->call_ms2sysv
= false;
5870 warn_once_call_ms2sysv_xlogues ("static call chains");
5873 /* Finally, compute which registers the stub will manage. */
5876 unsigned count
= xlogue_layout::count_stub_managed_regs ();
5877 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
5878 m
->call_ms2sysv_pad_in
= 0;
5882 frame
->nregs
= ix86_nsaved_regs ();
5883 frame
->nsseregs
= ix86_nsaved_sseregs ();
5885 /* 64-bit MS ABI seem to require stack alignment to be always 16,
5886 except for function prologues, leaf functions and when the defult
5887 incoming stack boundary is overriden at command line or via
5888 force_align_arg_pointer attribute.
5890 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
5891 at call sites, including profile function calls.
5893 if (((TARGET_64BIT_MS_ABI
|| TARGET_MACHO
)
5894 && crtl
->preferred_stack_boundary
< 128)
5895 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
5896 || ix86_current_function_calls_tls_descriptor
5897 || (TARGET_MACHO
&& crtl
->profile
)
5898 || ix86_incoming_stack_boundary
< 128))
5900 crtl
->preferred_stack_boundary
= 128;
5901 crtl
->stack_alignment_needed
= 128;
5904 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
5905 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5907 gcc_assert (!size
|| stack_alignment_needed
);
5908 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5909 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
5911 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
5912 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
5913 if (TARGET_64BIT
&& m
->call_ms2sysv
)
5915 gcc_assert (stack_alignment_needed
>= 16);
5916 gcc_assert (!frame
->nsseregs
);
5919 /* For SEH we have to limit the amount of code movement into the prologue.
5920 At present we do this via a BLOCKAGE, at which point there's very little
5921 scheduling that can be done, which means that there's very little point
5922 in doing anything except PUSHs. */
5924 m
->use_fast_prologue_epilogue
= false;
5925 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
5927 int count
= frame
->nregs
;
5928 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
5930 /* The fast prologue uses move instead of push to save registers. This
5931 is significantly longer, but also executes faster as modern hardware
5932 can execute the moves in parallel, but can't do that for push/pop.
5934 Be careful about choosing what prologue to emit: When function takes
5935 many instructions to execute we may use slow version as well as in
5936 case function is known to be outside hot spot (this is known with
5937 feedback only). Weight the size of function by number of registers
5938 to save as it is cheap to use one or two push instructions but very
5939 slow to use many of them. */
5941 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5942 if (node
->frequency
< NODE_FREQUENCY_NORMAL
5943 || (flag_branch_probabilities
5944 && node
->frequency
< NODE_FREQUENCY_HOT
))
5945 m
->use_fast_prologue_epilogue
= false;
5947 m
->use_fast_prologue_epilogue
5948 = !expensive_function_p (count
);
5951 frame
->save_regs_using_mov
5952 = (TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
5953 /* If static stack checking is enabled and done with probes,
5954 the registers need to be saved before allocating the frame. */
5955 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
5957 /* Skip return address and error code in exception handler. */
5958 offset
= INCOMING_FRAME_SP_OFFSET
;
5960 /* Skip pushed static chain. */
5961 if (ix86_static_chain_on_stack
)
5962 offset
+= UNITS_PER_WORD
;
5964 /* Skip saved base pointer. */
5965 if (frame_pointer_needed
)
5966 offset
+= UNITS_PER_WORD
;
5967 frame
->hfp_save_offset
= offset
;
5969 /* The traditional frame pointer location is at the top of the frame. */
5970 frame
->hard_frame_pointer_offset
= offset
;
5972 /* Register save area */
5973 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5974 frame
->reg_save_offset
= offset
;
5976 /* On SEH target, registers are pushed just before the frame pointer
5979 frame
->hard_frame_pointer_offset
= offset
;
5981 /* Calculate the size of the va-arg area (not including padding, if any). */
5982 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
5984 /* Also adjust stack_realign_offset for the largest alignment of
5985 stack slot actually used. */
5986 if (stack_realign_fp
5987 || (cfun
->machine
->max_used_stack_alignment
!= 0
5988 && (offset
% cfun
->machine
->max_used_stack_alignment
) != 0))
5990 /* We may need a 16-byte aligned stack for the remainder of the
5991 register save area, but the stack frame for the local function
5992 may require a greater alignment if using AVX/2/512. In order
5993 to avoid wasting space, we first calculate the space needed for
5994 the rest of the register saves, add that to the stack pointer,
5995 and then realign the stack to the boundary of the start of the
5996 frame for the local function. */
5997 HOST_WIDE_INT space_needed
= 0;
5998 HOST_WIDE_INT sse_reg_space_needed
= 0;
6002 if (m
->call_ms2sysv
)
6004 m
->call_ms2sysv_pad_in
= 0;
6005 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
6008 else if (frame
->nsseregs
)
6009 /* The only ABI that has saved SSE registers (Win64) also has a
6010 16-byte aligned default stack. However, many programs violate
6011 the ABI, and Wine64 forces stack realignment to compensate. */
6012 space_needed
= frame
->nsseregs
* 16;
6014 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
6016 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6017 rounding to be pedantic. */
6018 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
6021 space_needed
= frame
->va_arg_size
;
6023 /* Record the allocation size required prior to the realignment AND. */
6024 frame
->stack_realign_allocate
= space_needed
;
6026 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6027 before this point are not directly comparable with values below
6028 this point. Use sp_valid_at to determine if the stack pointer is
6029 valid for a given offset, fp_valid_at for the frame pointer, or
6030 choose_baseaddr to have a base register chosen for you.
6032 Note that the result of (frame->stack_realign_offset
6033 & (stack_alignment_needed - 1)) may not equal zero. */
6034 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
6035 frame
->stack_realign_offset
= offset
- space_needed
;
6036 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
6037 + sse_reg_space_needed
;
6041 frame
->stack_realign_offset
= offset
;
6043 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6045 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
6046 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
6049 /* Align and set SSE register save area. */
6050 else if (frame
->nsseregs
)
6052 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6053 required and the DRAP re-alignment boundary is at least 16 bytes,
6054 then we want the SSE register save area properly aligned. */
6055 if (ix86_incoming_stack_boundary
>= 128
6056 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
6057 offset
= ROUND_UP (offset
, 16);
6058 offset
+= frame
->nsseregs
* 16;
6060 frame
->sse_reg_save_offset
= offset
;
6061 offset
+= frame
->va_arg_size
;
6064 /* Align start of frame for local function. When a function call
6065 is removed, it may become a leaf function. But if argument may
6066 be passed on stack, we need to align the stack when there is no
6069 || frame
->va_arg_size
!= 0
6072 || (!crtl
->tail_call_emit
6073 && cfun
->machine
->outgoing_args_on_stack
)
6074 || cfun
->calls_alloca
6075 || ix86_current_function_calls_tls_descriptor
)
6076 offset
= ROUND_UP (offset
, stack_alignment_needed
);
6078 /* Frame pointer points here. */
6079 frame
->frame_pointer_offset
= offset
;
6083 /* Add outgoing arguments area. Can be skipped if we eliminated
6084 all the function calls as dead code.
6085 Skipping is however impossible when function calls alloca. Alloca
6086 expander assumes that last crtl->outgoing_args_size
6087 of stack frame are unused. */
6088 if (ACCUMULATE_OUTGOING_ARGS
6089 && (!crtl
->is_leaf
|| cfun
->calls_alloca
6090 || ix86_current_function_calls_tls_descriptor
))
6092 offset
+= crtl
->outgoing_args_size
;
6093 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
6096 frame
->outgoing_arguments_size
= 0;
6098 /* Align stack boundary. Only needed if we're calling another function
6100 if (!crtl
->is_leaf
|| cfun
->calls_alloca
6101 || ix86_current_function_calls_tls_descriptor
)
6102 offset
= ROUND_UP (offset
, preferred_alignment
);
6104 /* We've reached end of stack frame. */
6105 frame
->stack_pointer_offset
= offset
;
6107 /* Size prologue needs to allocate. */
6108 to_allocate
= offset
- frame
->sse_reg_save_offset
;
6110 if ((!to_allocate
&& frame
->nregs
<= 1)
6111 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000))
6112 /* If stack clash probing needs a loop, then it needs a
6113 scratch register. But the returned register is only guaranteed
6114 to be safe to use after register saves are complete. So if
6115 stack clash protections are enabled and the allocated frame is
6116 larger than the probe interval, then use pushes to save
6117 callee saved registers. */
6118 || (flag_stack_clash_protection
&& to_allocate
> get_probe_interval ()))
6119 frame
->save_regs_using_mov
= false;
6121 if (ix86_using_red_zone ()
6122 && crtl
->sp_is_unchanging
6124 && !ix86_pc_thunk_call_expanded
6125 && !ix86_current_function_calls_tls_descriptor
)
6127 frame
->red_zone_size
= to_allocate
;
6128 if (frame
->save_regs_using_mov
)
6129 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6130 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6131 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6134 frame
->red_zone_size
= 0;
6135 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6137 /* The SEH frame pointer location is near the bottom of the frame.
6138 This is enforced by the fact that the difference between the
6139 stack pointer and the frame pointer is limited to 240 bytes in
6140 the unwind data structure. */
6145 /* If we can leave the frame pointer where it is, do so. Also, returns
6146 the establisher frame for __builtin_frame_address (0). */
6147 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
6148 if (diff
<= SEH_MAX_FRAME_SIZE
6149 && (diff
> 240 || (diff
& 15) != 0)
6150 && !crtl
->accesses_prior_frames
)
6152 /* Ideally we'd determine what portion of the local stack frame
6153 (within the constraint of the lowest 240) is most heavily used.
6154 But without that complication, simply bias the frame pointer
6155 by 128 bytes so as to maximize the amount of the local stack
6156 frame that is addressable with 8-bit offsets. */
6157 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
6162 /* This is semi-inlined memory_address_length, but simplified
6163 since we know that we're always dealing with reg+offset, and
6164 to avoid having to create and discard all that rtl. */
6167 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
6173 /* EBP and R13 cannot be encoded without an offset. */
6174 len
= (regno
== BP_REG
|| regno
== R13_REG
);
6176 else if (IN_RANGE (offset
, -128, 127))
6179 /* ESP and R12 must be encoded with a SIB byte. */
6180 if (regno
== SP_REG
|| regno
== R12_REG
)
6186 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6187 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6190 sp_valid_at (HOST_WIDE_INT cfa_offset
)
6192 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6193 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
6195 /* Validate that the cfa_offset isn't in a "no-man's land". */
6196 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
6202 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6203 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6206 fp_valid_at (HOST_WIDE_INT cfa_offset
)
6208 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6209 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
6211 /* Validate that the cfa_offset isn't in a "no-man's land". */
6212 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
6218 /* Choose a base register based upon alignment requested, speed and/or
6222 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
6223 HOST_WIDE_INT
&base_offset
,
6224 unsigned int align_reqested
, unsigned int *align
)
6226 const struct machine_function
*m
= cfun
->machine
;
6227 unsigned int hfp_align
;
6228 unsigned int drap_align
;
6229 unsigned int sp_align
;
6230 bool hfp_ok
= fp_valid_at (cfa_offset
);
6231 bool drap_ok
= m
->fs
.drap_valid
;
6232 bool sp_ok
= sp_valid_at (cfa_offset
);
6234 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
6236 /* Filter out any registers that don't meet the requested alignment
6240 if (m
->fs
.realigned
)
6241 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
6242 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6243 notes (which we would need to use a realigned stack pointer),
6244 so disable on SEH targets. */
6245 else if (m
->fs
.sp_realigned
)
6246 sp_align
= crtl
->stack_alignment_needed
;
6248 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
6249 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
6250 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
6253 if (m
->use_fast_prologue_epilogue
)
6255 /* Choose the base register most likely to allow the most scheduling
6256 opportunities. Generally FP is valid throughout the function,
6257 while DRAP must be reloaded within the epilogue. But choose either
6258 over the SP due to increased encoding size. */
6262 base_reg
= hard_frame_pointer_rtx
;
6263 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
6267 base_reg
= crtl
->drap_reg
;
6268 base_offset
= 0 - cfa_offset
;
6272 base_reg
= stack_pointer_rtx
;
6273 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6278 HOST_WIDE_INT toffset
;
6281 /* Choose the base register with the smallest address encoding.
6282 With a tie, choose FP > DRAP > SP. */
6285 base_reg
= stack_pointer_rtx
;
6286 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6287 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
6291 toffset
= 0 - cfa_offset
;
6292 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
6295 base_reg
= crtl
->drap_reg
;
6296 base_offset
= toffset
;
6302 toffset
= m
->fs
.fp_offset
- cfa_offset
;
6303 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
6306 base_reg
= hard_frame_pointer_rtx
;
6307 base_offset
= toffset
;
6313 /* Set the align return value. */
6316 if (base_reg
== stack_pointer_rtx
)
6318 else if (base_reg
== crtl
->drap_reg
)
6319 *align
= drap_align
;
6320 else if (base_reg
== hard_frame_pointer_rtx
)
6325 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6326 the alignment of address. If ALIGN is non-null, it should point to
6327 an alignment value (in bits) that is preferred or zero and will
6328 recieve the alignment of the base register that was selected,
6329 irrespective of rather or not CFA_OFFSET is a multiple of that
6330 alignment value. If it is possible for the base register offset to be
6331 non-immediate then SCRATCH_REGNO should specify a scratch register to
6334 The valid base registers are taken from CFUN->MACHINE->FS. */
6337 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
6338 unsigned int scratch_regno
= INVALID_REGNUM
)
6340 rtx base_reg
= NULL
;
6341 HOST_WIDE_INT base_offset
= 0;
6343 /* If a specific alignment is requested, try to get a base register
6344 with that alignment first. */
6345 if (align
&& *align
)
6346 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
6349 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
6351 gcc_assert (base_reg
!= NULL
);
6353 rtx base_offset_rtx
= GEN_INT (base_offset
);
6355 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
6357 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
6359 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
6360 emit_move_insn (scratch_reg
, base_offset_rtx
);
6362 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
6365 return plus_constant (Pmode
, base_reg
, base_offset
);
6368 /* Emit code to save registers in the prologue. */
6371 ix86_emit_save_regs (void)
6376 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
6377 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6379 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
6380 RTX_FRAME_RELATED_P (insn
) = 1;
6384 /* Emit a single register save at CFA - CFA_OFFSET. */
6387 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
6388 HOST_WIDE_INT cfa_offset
)
6390 struct machine_function
*m
= cfun
->machine
;
6391 rtx reg
= gen_rtx_REG (mode
, regno
);
6392 rtx mem
, addr
, base
, insn
;
6393 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
6395 addr
= choose_baseaddr (cfa_offset
, &align
);
6396 mem
= gen_frame_mem (mode
, addr
);
6398 /* The location aligment depends upon the base register. */
6399 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
6400 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
6401 set_mem_align (mem
, align
);
6403 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
6404 RTX_FRAME_RELATED_P (insn
) = 1;
6407 if (GET_CODE (base
) == PLUS
)
6408 base
= XEXP (base
, 0);
6409 gcc_checking_assert (REG_P (base
));
6411 /* When saving registers into a re-aligned local stack frame, avoid
6412 any tricky guessing by dwarf2out. */
6413 if (m
->fs
.realigned
)
6415 gcc_checking_assert (stack_realign_drap
);
6417 if (regno
== REGNO (crtl
->drap_reg
))
6419 /* A bit of a hack. We force the DRAP register to be saved in
6420 the re-aligned stack frame, which provides us with a copy
6421 of the CFA that will last past the prologue. Install it. */
6422 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6423 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6424 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6425 mem
= gen_rtx_MEM (mode
, addr
);
6426 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
6430 /* The frame pointer is a stable reference within the
6431 aligned frame. Use it. */
6432 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6433 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6434 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6435 mem
= gen_rtx_MEM (mode
, addr
);
6436 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6440 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
6441 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
6443 gcc_checking_assert (stack_realign_fp
);
6444 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6447 /* The memory may not be relative to the current CFA register,
6448 which means that we may need to generate a new pattern for
6449 use by the unwind info. */
6450 else if (base
!= m
->fs
.cfa_reg
)
6452 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
6453 m
->fs
.cfa_offset
- cfa_offset
);
6454 mem
= gen_rtx_MEM (mode
, addr
);
6455 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
6459 /* Emit code to save registers using MOV insns.
6460 First register is stored at CFA - CFA_OFFSET. */
6462 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
6466 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6467 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6469 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
6470 cfa_offset
-= UNITS_PER_WORD
;
6474 /* Emit code to save SSE registers using MOV insns.
6475 First register is stored at CFA - CFA_OFFSET. */
6477 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
6481 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6482 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6484 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
6485 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
6489 static GTY(()) rtx queued_cfa_restores
;
6491 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6492 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
6493 Don't add the note if the previously saved value will be left untouched
6494 within stack red-zone till return, as unwinders can find the same value
6495 in the register and on the stack. */
6498 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
6500 if (!crtl
->shrink_wrapped
6501 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
6506 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
6507 RTX_FRAME_RELATED_P (insn
) = 1;
6511 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
6514 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
6517 ix86_add_queued_cfa_restore_notes (rtx insn
)
6520 if (!queued_cfa_restores
)
6522 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
6524 XEXP (last
, 1) = REG_NOTES (insn
);
6525 REG_NOTES (insn
) = queued_cfa_restores
;
6526 queued_cfa_restores
= NULL_RTX
;
6527 RTX_FRAME_RELATED_P (insn
) = 1;
6530 /* Expand prologue or epilogue stack adjustment.
6531 The pattern exist to put a dependency on all ebp-based memory accesses.
6532 STYLE should be negative if instructions should be marked as frame related,
6533 zero if %r11 register is live and cannot be freely used and positive
6537 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
6538 int style
, bool set_cfa
)
6540 struct machine_function
*m
= cfun
->machine
;
6542 bool add_frame_related_expr
= false;
6544 if (Pmode
== SImode
)
6545 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
6546 else if (x86_64_immediate_operand (offset
, DImode
))
6547 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
6551 /* r11 is used by indirect sibcall return as well, set before the
6552 epilogue and used after the epilogue. */
6554 tmp
= gen_rtx_REG (DImode
, R11_REG
);
6557 gcc_assert (src
!= hard_frame_pointer_rtx
6558 && dest
!= hard_frame_pointer_rtx
);
6559 tmp
= hard_frame_pointer_rtx
;
6561 insn
= emit_insn (gen_rtx_SET (tmp
, offset
));
6563 add_frame_related_expr
= true;
6565 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
6568 insn
= emit_insn (insn
);
6570 ix86_add_queued_cfa_restore_notes (insn
);
6576 gcc_assert (m
->fs
.cfa_reg
== src
);
6577 m
->fs
.cfa_offset
+= INTVAL (offset
);
6578 m
->fs
.cfa_reg
= dest
;
6580 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
6581 r
= gen_rtx_SET (dest
, r
);
6582 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
6583 RTX_FRAME_RELATED_P (insn
) = 1;
6587 RTX_FRAME_RELATED_P (insn
) = 1;
6588 if (add_frame_related_expr
)
6590 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
6591 r
= gen_rtx_SET (dest
, r
);
6592 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
6596 if (dest
== stack_pointer_rtx
)
6598 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
6599 bool valid
= m
->fs
.sp_valid
;
6600 bool realigned
= m
->fs
.sp_realigned
;
6602 if (src
== hard_frame_pointer_rtx
)
6604 valid
= m
->fs
.fp_valid
;
6606 ooffset
= m
->fs
.fp_offset
;
6608 else if (src
== crtl
->drap_reg
)
6610 valid
= m
->fs
.drap_valid
;
6616 /* Else there are two possibilities: SP itself, which we set
6617 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
6618 taken care of this by hand along the eh_return path. */
6619 gcc_checking_assert (src
== stack_pointer_rtx
6620 || offset
== const0_rtx
);
6623 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
6624 m
->fs
.sp_valid
= valid
;
6625 m
->fs
.sp_realigned
= realigned
;
6630 /* Find an available register to be used as dynamic realign argument
6631 pointer regsiter. Such a register will be written in prologue and
6632 used in begin of body, so it must not be
6633 1. parameter passing register.
6635 We reuse static-chain register if it is available. Otherwise, we
6636 use DI for i386 and R13 for x86-64. We chose R13 since it has
6639 Return: the regno of chosen register. */
6642 find_drap_reg (void)
6644 tree decl
= cfun
->decl
;
6646 /* Always use callee-saved register if there are no caller-saved
6650 /* Use R13 for nested function or function need static chain.
6651 Since function with tail call may use any caller-saved
6652 registers in epilogue, DRAP must not use caller-saved
6653 register in such case. */
6654 if (DECL_STATIC_CHAIN (decl
)
6655 || cfun
->machine
->no_caller_saved_registers
6656 || crtl
->tail_call_emit
)
6663 /* Use DI for nested function or function need static chain.
6664 Since function with tail call may use any caller-saved
6665 registers in epilogue, DRAP must not use caller-saved
6666 register in such case. */
6667 if (DECL_STATIC_CHAIN (decl
)
6668 || cfun
->machine
->no_caller_saved_registers
6669 || crtl
->tail_call_emit
)
6672 /* Reuse static chain register if it isn't used for parameter
6674 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
6676 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
6677 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
6684 /* Return minimum incoming stack alignment. */
6687 ix86_minimum_incoming_stack_boundary (bool sibcall
)
6689 unsigned int incoming_stack_boundary
;
6691 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
6692 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
6693 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
6694 /* Prefer the one specified at command line. */
6695 else if (ix86_user_incoming_stack_boundary
)
6696 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
6697 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6698 if -mstackrealign is used, it isn't used for sibcall check and
6699 estimated stack alignment is 128bit. */
6701 && ix86_force_align_arg_pointer
6702 && crtl
->stack_alignment_estimated
== 128)
6703 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
6705 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
6707 /* Incoming stack alignment can be changed on individual functions
6708 via force_align_arg_pointer attribute. We use the smallest
6709 incoming stack boundary. */
6710 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
6711 && lookup_attribute ("force_align_arg_pointer",
6712 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
6713 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
6715 /* The incoming stack frame has to be aligned at least at
6716 parm_stack_boundary. */
6717 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
6718 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
6720 /* Stack at entrance of main is aligned by runtime. We use the
6721 smallest incoming stack boundary. */
6722 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
6723 && DECL_NAME (current_function_decl
)
6724 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
6725 && DECL_FILE_SCOPE_P (current_function_decl
))
6726 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
6728 return incoming_stack_boundary
;
6731 /* Update incoming stack boundary and estimated stack alignment. */
6734 ix86_update_stack_boundary (void)
6736 ix86_incoming_stack_boundary
6737 = ix86_minimum_incoming_stack_boundary (false);
6739 /* x86_64 vararg needs 16byte stack alignment for register save area. */
6742 && crtl
->stack_alignment_estimated
< 128)
6743 crtl
->stack_alignment_estimated
= 128;
6745 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
6746 if (ix86_tls_descriptor_calls_expanded_in_cfun
6747 && crtl
->preferred_stack_boundary
< 128)
6748 crtl
->preferred_stack_boundary
= 128;
6751 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
6752 needed or an rtx for DRAP otherwise. */
6755 ix86_get_drap_rtx (void)
6757 /* We must use DRAP if there are outgoing arguments on stack and
6758 ACCUMULATE_OUTGOING_ARGS is false. */
6760 || (cfun
->machine
->outgoing_args_on_stack
6761 && !ACCUMULATE_OUTGOING_ARGS
))
6762 crtl
->need_drap
= true;
6764 if (stack_realign_drap
)
6766 /* Assign DRAP to vDRAP and returns vDRAP */
6767 unsigned int regno
= find_drap_reg ();
6770 rtx_insn
*seq
, *insn
;
6772 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
6773 crtl
->drap_reg
= arg_ptr
;
6776 drap_vreg
= copy_to_reg (arg_ptr
);
6780 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
6783 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
6784 RTX_FRAME_RELATED_P (insn
) = 1;
6792 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6795 ix86_internal_arg_pointer (void)
6797 return virtual_incoming_args_rtx
;
6800 struct scratch_reg
{
6805 /* Return a short-lived scratch register for use on function entry.
6806 In 32-bit mode, it is valid only after the registers are saved
6807 in the prologue. This register must be released by means of
6808 release_scratch_register_on_entry once it is dead. */
6811 get_scratch_register_on_entry (struct scratch_reg
*sr
)
6819 /* We always use R11 in 64-bit mode. */
6824 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
6826 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
6828 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
6829 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
6830 int regparm
= ix86_function_regparm (fntype
, decl
);
6832 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
6834 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
6835 for the static chain register. */
6836 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
6837 && drap_regno
!= AX_REG
)
6839 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
6840 for the static chain register. */
6841 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
6843 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
6845 /* ecx is the static chain register. */
6846 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
6848 && drap_regno
!= CX_REG
)
6850 else if (ix86_save_reg (BX_REG
, true, false))
6852 /* esi is the static chain register. */
6853 else if (!(regparm
== 3 && static_chain_p
)
6854 && ix86_save_reg (SI_REG
, true, false))
6856 else if (ix86_save_reg (DI_REG
, true, false))
6860 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
6865 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
6868 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
6869 RTX_FRAME_RELATED_P (insn
) = 1;
6873 /* Release a scratch register obtained from the preceding function.
6875 If RELEASE_VIA_POP is true, we just pop the register off the stack
6876 to release it. This is what non-Linux systems use with -fstack-check.
6878 Otherwise we use OFFSET to locate the saved register and the
6879 allocated stack space becomes part of the local frame and is
6880 deallocated by the epilogue. */
6883 release_scratch_register_on_entry (struct scratch_reg
*sr
, HOST_WIDE_INT offset
,
6884 bool release_via_pop
)
6888 if (release_via_pop
)
6890 struct machine_function
*m
= cfun
->machine
;
6891 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
6893 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
6894 RTX_FRAME_RELATED_P (insn
) = 1;
6895 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
6896 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
6897 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
6898 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
6902 rtx x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (offset
));
6903 x
= gen_rtx_SET (sr
->reg
, gen_rtx_MEM (word_mode
, x
));
6909 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
6911 This differs from the next routine in that it tries hard to prevent
6912 attacks that jump the stack guard. Thus it is never allowed to allocate
6913 more than PROBE_INTERVAL bytes of stack space without a suitable
6916 INT_REGISTERS_SAVED is true if integer registers have already been
6917 pushed on the stack. */
6920 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size
,
6921 const bool int_registers_saved
)
6923 struct machine_function
*m
= cfun
->machine
;
6925 /* If this function does not statically allocate stack space, then
6926 no probes are needed. */
6929 /* However, the allocation of space via pushes for register
6930 saves could be viewed as allocating space, but without the
6932 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
6933 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
6935 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
6939 /* If we are a noreturn function, then we have to consider the
6940 possibility that we're called via a jump rather than a call.
6942 Thus we don't have the implicit probe generated by saving the
6943 return address into the stack at the call. Thus, the stack
6944 pointer could be anywhere in the guard page. The safe thing
6945 to do is emit a probe now.
6947 The probe can be avoided if we have already emitted any callee
6948 register saves into the stack or have a frame pointer (which will
6949 have been saved as well). Those saves will function as implicit
6952 ?!? This should be revamped to work like aarch64 and s390 where
6953 we track the offset from the most recent probe. Normally that
6954 offset would be zero. For a noreturn function we would reset
6955 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
6956 we just probe when we cross PROBE_INTERVAL. */
6957 if (TREE_THIS_VOLATILE (cfun
->decl
)
6958 && !(m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
))
6960 /* We can safely use any register here since we're just going to push
6961 its value and immediately pop it back. But we do try and avoid
6962 argument passing registers so as not to introduce dependencies in
6963 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
6964 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
6965 rtx_insn
*insn_push
= emit_insn (gen_push (dummy_reg
));
6966 rtx_insn
*insn_pop
= emit_insn (gen_pop (dummy_reg
));
6967 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
6968 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6970 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
6971 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
6972 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
6973 add_reg_note (insn_push
, REG_CFA_ADJUST_CFA
, x
);
6974 RTX_FRAME_RELATED_P (insn_push
) = 1;
6975 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
6976 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
6977 add_reg_note (insn_pop
, REG_CFA_ADJUST_CFA
, x
);
6978 RTX_FRAME_RELATED_P (insn_pop
) = 1;
6980 emit_insn (gen_blockage ());
6983 /* If we allocate less than the size of the guard statically,
6984 then no probing is necessary, but we do need to allocate
6986 if (size
< (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE
)))
6988 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6989 GEN_INT (-size
), -1,
6990 m
->fs
.cfa_reg
== stack_pointer_rtx
);
6991 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
6995 /* We're allocating a large enough stack frame that we need to
6996 emit probes. Either emit them inline or in a loop depending
6998 HOST_WIDE_INT probe_interval
= get_probe_interval ();
6999 if (size
<= 4 * probe_interval
)
7002 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
7004 /* Allocate PROBE_INTERVAL bytes. */
7006 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7007 GEN_INT (-probe_interval
), -1,
7008 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7009 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
7011 /* And probe at *sp. */
7012 emit_stack_probe (stack_pointer_rtx
);
7013 emit_insn (gen_blockage ());
7016 /* We need to allocate space for the residual, but we do not need
7017 to probe the residual. */
7018 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
7020 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7021 GEN_INT (residual
), -1,
7022 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7023 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
7027 /* We expect the GP registers to be saved when probes are used
7028 as the probing sequences might need a scratch register and
7029 the routine to allocate one assumes the integer registers
7030 have already been saved. */
7031 gcc_assert (int_registers_saved
);
7033 struct scratch_reg sr
;
7034 get_scratch_register_on_entry (&sr
);
7036 /* If we needed to save a register, then account for any space
7037 that was pushed (we are not going to pop the register when
7038 we do the restore). */
7040 size
-= UNITS_PER_WORD
;
7042 /* Step 1: round SIZE down to a multiple of the interval. */
7043 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
7045 /* Step 2: compute final value of the loop counter. Use lea if
7047 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
7049 if (address_no_seg_operand (addr
, Pmode
))
7050 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
7053 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
7054 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
7055 gen_rtx_PLUS (Pmode
, sr
.reg
,
7056 stack_pointer_rtx
)));
7058 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7060 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7061 plus_constant (Pmode
, sr
.reg
,
7062 m
->fs
.cfa_offset
+ rounded_size
));
7063 RTX_FRAME_RELATED_P (insn
) = 1;
7066 /* Step 3: the loop. */
7067 rtx size_rtx
= GEN_INT (rounded_size
);
7068 insn
= emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
,
7070 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7072 m
->fs
.cfa_offset
+= rounded_size
;
7073 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7074 plus_constant (Pmode
, stack_pointer_rtx
,
7076 RTX_FRAME_RELATED_P (insn
) = 1;
7078 m
->fs
.sp_offset
+= rounded_size
;
7079 emit_insn (gen_blockage ());
7081 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7082 is equal to ROUNDED_SIZE. */
7084 if (size
!= rounded_size
)
7085 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7086 GEN_INT (rounded_size
- size
), -1,
7087 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7088 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
7090 /* This does not deallocate the space reserved for the scratch
7091 register. That will be deallocated in the epilogue. */
7092 release_scratch_register_on_entry (&sr
, size
, false);
7095 /* Make sure nothing is scheduled before we are done. */
7096 emit_insn (gen_blockage ());
7099 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7101 INT_REGISTERS_SAVED is true if integer registers have already been
7102 pushed on the stack. */
7105 ix86_adjust_stack_and_probe (HOST_WIDE_INT size
,
7106 const bool int_registers_saved
)
7108 /* We skip the probe for the first interval + a small dope of 4 words and
7109 probe that many bytes past the specified size to maintain a protection
7110 area at the botton of the stack. */
7111 const int dope
= 4 * UNITS_PER_WORD
;
7112 rtx size_rtx
= GEN_INT (size
), last
;
7114 /* See if we have a constant small number of probes to generate. If so,
7115 that's the easy case. The run-time loop is made up of 9 insns in the
7116 generic case while the compile-time loop is made up of 3+2*(n-1) insns
7117 for n # of intervals. */
7118 if (size
<= 4 * get_probe_interval ())
7120 HOST_WIDE_INT i
, adjust
;
7121 bool first_probe
= true;
7123 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7124 values of N from 1 until it exceeds SIZE. If only one probe is
7125 needed, this will not generate any code. Then adjust and probe
7126 to PROBE_INTERVAL + SIZE. */
7127 for (i
= get_probe_interval (); i
< size
; i
+= get_probe_interval ())
7131 adjust
= 2 * get_probe_interval () + dope
;
7132 first_probe
= false;
7135 adjust
= get_probe_interval ();
7137 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7138 plus_constant (Pmode
, stack_pointer_rtx
,
7140 emit_stack_probe (stack_pointer_rtx
);
7144 adjust
= size
+ get_probe_interval () + dope
;
7146 adjust
= size
+ get_probe_interval () - i
;
7148 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7149 plus_constant (Pmode
, stack_pointer_rtx
,
7151 emit_stack_probe (stack_pointer_rtx
);
7153 /* Adjust back to account for the additional first interval. */
7154 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7155 plus_constant (Pmode
, stack_pointer_rtx
,
7156 (get_probe_interval ()
7160 /* Otherwise, do the same as above, but in a loop. Note that we must be
7161 extra careful with variables wrapping around because we might be at
7162 the very top (or the very bottom) of the address space and we have
7163 to be able to handle this case properly; in particular, we use an
7164 equality test for the loop condition. */
7167 /* We expect the GP registers to be saved when probes are used
7168 as the probing sequences might need a scratch register and
7169 the routine to allocate one assumes the integer registers
7170 have already been saved. */
7171 gcc_assert (int_registers_saved
);
7173 HOST_WIDE_INT rounded_size
;
7174 struct scratch_reg sr
;
7176 get_scratch_register_on_entry (&sr
);
7178 /* If we needed to save a register, then account for any space
7179 that was pushed (we are not going to pop the register when
7180 we do the restore). */
7182 size
-= UNITS_PER_WORD
;
7184 /* Step 1: round SIZE to the previous multiple of the interval. */
7186 rounded_size
= ROUND_DOWN (size
, get_probe_interval ());
7189 /* Step 2: compute initial and final value of the loop counter. */
7191 /* SP = SP_0 + PROBE_INTERVAL. */
7192 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7193 plus_constant (Pmode
, stack_pointer_rtx
,
7194 - (get_probe_interval () + dope
))));
7196 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
7197 if (rounded_size
<= (HOST_WIDE_INT_1
<< 31))
7198 emit_insn (gen_rtx_SET (sr
.reg
,
7199 plus_constant (Pmode
, stack_pointer_rtx
,
7203 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
7204 emit_insn (gen_rtx_SET (sr
.reg
,
7205 gen_rtx_PLUS (Pmode
, sr
.reg
,
7206 stack_pointer_rtx
)));
7214 SP = SP + PROBE_INTERVAL
7217 while (SP != LAST_ADDR)
7219 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7220 values of N from 1 until it is equal to ROUNDED_SIZE. */
7222 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
7225 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7226 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
7228 if (size
!= rounded_size
)
7230 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7231 plus_constant (Pmode
, stack_pointer_rtx
,
7232 rounded_size
- size
)));
7233 emit_stack_probe (stack_pointer_rtx
);
7236 /* Adjust back to account for the additional first interval. */
7237 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7238 plus_constant (Pmode
, stack_pointer_rtx
,
7239 (get_probe_interval ()
7242 /* This does not deallocate the space reserved for the scratch
7243 register. That will be deallocated in the epilogue. */
7244 release_scratch_register_on_entry (&sr
, size
, false);
7247 /* Even if the stack pointer isn't the CFA register, we need to correctly
7248 describe the adjustments made to it, in particular differentiate the
7249 frame-related ones from the frame-unrelated ones. */
7252 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
7253 XVECEXP (expr
, 0, 0)
7254 = gen_rtx_SET (stack_pointer_rtx
,
7255 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
7256 XVECEXP (expr
, 0, 1)
7257 = gen_rtx_SET (stack_pointer_rtx
,
7258 plus_constant (Pmode
, stack_pointer_rtx
,
7259 get_probe_interval () + dope
+ size
));
7260 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
7261 RTX_FRAME_RELATED_P (last
) = 1;
7263 cfun
->machine
->fs
.sp_offset
+= size
;
7266 /* Make sure nothing is scheduled before we are done. */
7267 emit_insn (gen_blockage ());
7270 /* Adjust the stack pointer up to REG while probing it. */
7273 output_adjust_stack_and_probe (rtx reg
)
7275 static int labelno
= 0;
7279 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7282 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7284 /* SP = SP + PROBE_INTERVAL. */
7285 xops
[0] = stack_pointer_rtx
;
7286 xops
[1] = GEN_INT (get_probe_interval ());
7287 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7290 xops
[1] = const0_rtx
;
7291 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
7293 /* Test if SP == LAST_ADDR. */
7294 xops
[0] = stack_pointer_rtx
;
7296 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7299 fputs ("\tjne\t", asm_out_file
);
7300 assemble_name_raw (asm_out_file
, loop_lab
);
7301 fputc ('\n', asm_out_file
);
7306 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7307 inclusive. These are offsets from the current stack pointer.
7309 INT_REGISTERS_SAVED is true if integer registers have already been
7310 pushed on the stack. */
7313 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
7314 const bool int_registers_saved
)
7316 /* See if we have a constant small number of probes to generate. If so,
7317 that's the easy case. The run-time loop is made up of 6 insns in the
7318 generic case while the compile-time loop is made up of n insns for n #
7320 if (size
<= 6 * get_probe_interval ())
7324 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7325 it exceeds SIZE. If only one probe is needed, this will not
7326 generate any code. Then probe at FIRST + SIZE. */
7327 for (i
= get_probe_interval (); i
< size
; i
+= get_probe_interval ())
7328 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7331 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7335 /* Otherwise, do the same as above, but in a loop. Note that we must be
7336 extra careful with variables wrapping around because we might be at
7337 the very top (or the very bottom) of the address space and we have
7338 to be able to handle this case properly; in particular, we use an
7339 equality test for the loop condition. */
7342 /* We expect the GP registers to be saved when probes are used
7343 as the probing sequences might need a scratch register and
7344 the routine to allocate one assumes the integer registers
7345 have already been saved. */
7346 gcc_assert (int_registers_saved
);
7348 HOST_WIDE_INT rounded_size
, last
;
7349 struct scratch_reg sr
;
7351 get_scratch_register_on_entry (&sr
);
7354 /* Step 1: round SIZE to the previous multiple of the interval. */
7356 rounded_size
= ROUND_DOWN (size
, get_probe_interval ());
7359 /* Step 2: compute initial and final value of the loop counter. */
7361 /* TEST_OFFSET = FIRST. */
7362 emit_move_insn (sr
.reg
, GEN_INT (-first
));
7364 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7365 last
= first
+ rounded_size
;
7372 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7375 while (TEST_ADDR != LAST_ADDR)
7377 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7378 until it is equal to ROUNDED_SIZE. */
7380 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
7383 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7384 that SIZE is equal to ROUNDED_SIZE. */
7386 if (size
!= rounded_size
)
7387 emit_stack_probe (plus_constant (Pmode
,
7388 gen_rtx_PLUS (Pmode
,
7391 rounded_size
- size
));
7393 release_scratch_register_on_entry (&sr
, size
, true);
7396 /* Make sure nothing is scheduled before we are done. */
7397 emit_insn (gen_blockage ());
7400 /* Probe a range of stack addresses from REG to END, inclusive. These are
7401 offsets from the current stack pointer. */
7404 output_probe_stack_range (rtx reg
, rtx end
)
7406 static int labelno
= 0;
7410 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7413 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7415 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7417 xops
[1] = GEN_INT (get_probe_interval ());
7418 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7420 /* Probe at TEST_ADDR. */
7421 xops
[0] = stack_pointer_rtx
;
7423 xops
[2] = const0_rtx
;
7424 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
7426 /* Test if TEST_ADDR == LAST_ADDR. */
7429 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7432 fputs ("\tjne\t", asm_out_file
);
7433 assemble_name_raw (asm_out_file
, loop_lab
);
7434 fputc ('\n', asm_out_file
);
7439 /* Return true if stack frame is required. Update STACK_ALIGNMENT
7440 to the largest alignment, in bits, of stack slot used if stack
7441 frame is required and CHECK_STACK_SLOT is true. */
7444 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment
,
7445 bool check_stack_slot
)
7447 HARD_REG_SET set_up_by_prologue
, prologue_used
;
7450 CLEAR_HARD_REG_SET (prologue_used
);
7451 CLEAR_HARD_REG_SET (set_up_by_prologue
);
7452 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
7453 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
7454 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
7455 HARD_FRAME_POINTER_REGNUM
);
7457 /* The preferred stack alignment is the minimum stack alignment. */
7458 if (stack_alignment
> crtl
->preferred_stack_boundary
)
7459 stack_alignment
= crtl
->preferred_stack_boundary
;
7461 bool require_stack_frame
= false;
7463 FOR_EACH_BB_FN (bb
, cfun
)
7466 FOR_BB_INSNS (bb
, insn
)
7467 if (NONDEBUG_INSN_P (insn
)
7468 && requires_stack_frame_p (insn
, prologue_used
,
7469 set_up_by_prologue
))
7471 require_stack_frame
= true;
7473 if (check_stack_slot
)
7475 /* Find the maximum stack alignment. */
7476 subrtx_iterator::array_type array
;
7477 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
7479 && (reg_mentioned_p (stack_pointer_rtx
,
7481 || reg_mentioned_p (frame_pointer_rtx
,
7484 unsigned int alignment
= MEM_ALIGN (*iter
);
7485 if (alignment
> stack_alignment
)
7486 stack_alignment
= alignment
;
7492 return require_stack_frame
;
7495 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7496 will guide prologue/epilogue to be generated in correct form. */
7499 ix86_finalize_stack_frame_flags (void)
7501 /* Check if stack realign is really needed after reload, and
7502 stores result in cfun */
7503 unsigned int incoming_stack_boundary
7504 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
7505 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
7506 unsigned int stack_alignment
7507 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
7508 ? crtl
->max_used_stack_slot_alignment
7509 : crtl
->stack_alignment_needed
);
7510 unsigned int stack_realign
7511 = (incoming_stack_boundary
< stack_alignment
);
7512 bool recompute_frame_layout_p
= false;
7514 if (crtl
->stack_realign_finalized
)
7516 /* After stack_realign_needed is finalized, we can't no longer
7518 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
7522 /* If the only reason for frame_pointer_needed is that we conservatively
7523 assumed stack realignment might be needed or -fno-omit-frame-pointer
7524 is used, but in the end nothing that needed the stack alignment had
7525 been spilled nor stack access, clear frame_pointer_needed and say we
7526 don't need stack realignment. */
7527 if ((stack_realign
|| (!flag_omit_frame_pointer
&& optimize
))
7528 && frame_pointer_needed
7530 && crtl
->sp_is_unchanging
7531 && !ix86_current_function_calls_tls_descriptor
7532 && !crtl
->accesses_prior_frames
7533 && !cfun
->calls_alloca
7534 && !crtl
->calls_eh_return
7535 /* See ira_setup_eliminable_regset for the rationale. */
7536 && !(STACK_CHECK_MOVING_SP
7539 && cfun
->can_throw_non_call_exceptions
)
7540 && !ix86_frame_pointer_required ()
7541 && get_frame_size () == 0
7542 && ix86_nsaved_sseregs () == 0
7543 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
7545 if (ix86_find_max_used_stack_alignment (stack_alignment
,
7548 /* Stack frame is required. If stack alignment needed is less
7549 than incoming stack boundary, don't realign stack. */
7550 stack_realign
= incoming_stack_boundary
< stack_alignment
;
7553 crtl
->max_used_stack_slot_alignment
7554 = incoming_stack_boundary
;
7555 crtl
->stack_alignment_needed
7556 = incoming_stack_boundary
;
7557 /* Also update preferred_stack_boundary for leaf
7559 crtl
->preferred_stack_boundary
7560 = incoming_stack_boundary
;
7565 /* If drap has been set, but it actually isn't live at the
7566 start of the function, there is no reason to set it up. */
7569 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
7570 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
7571 REGNO (crtl
->drap_reg
)))
7573 crtl
->drap_reg
= NULL_RTX
;
7574 crtl
->need_drap
= false;
7578 cfun
->machine
->no_drap_save_restore
= true;
7580 frame_pointer_needed
= false;
7581 stack_realign
= false;
7582 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
7583 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
7584 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
7585 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
7586 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
7587 df_finish_pass (true);
7588 df_scan_alloc (NULL
);
7590 df_compute_regs_ever_live (true);
7593 if (flag_var_tracking
)
7595 /* Since frame pointer is no longer available, replace it with
7596 stack pointer - UNITS_PER_WORD in debug insns. */
7598 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
7601 next
= DF_REF_NEXT_REG (ref
);
7602 if (!DF_REF_INSN_INFO (ref
))
7605 /* Make sure the next ref is for a different instruction,
7606 so that we're not affected by the rescan. */
7607 rtx_insn
*insn
= DF_REF_INSN (ref
);
7608 while (next
&& DF_REF_INSN (next
) == insn
)
7609 next
= DF_REF_NEXT_REG (next
);
7611 if (DEBUG_INSN_P (insn
))
7613 bool changed
= false;
7614 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
7616 rtx
*loc
= DF_REF_LOC (ref
);
7617 if (*loc
== hard_frame_pointer_rtx
)
7619 *loc
= plus_constant (Pmode
,
7626 df_insn_rescan (insn
);
7631 recompute_frame_layout_p
= true;
7634 else if (crtl
->max_used_stack_slot_alignment
>= 128)
7636 /* We don't need to realign stack. max_used_stack_alignment is
7637 used to decide how stack frame should be aligned. This is
7638 independent of any psABIs nor 32-bit vs 64-bit. It is always
7639 safe to compute max_used_stack_alignment. We compute it only
7640 if 128-bit aligned load/store may be generated on misaligned
7641 stack slot which will lead to segfault. */
7642 if (ix86_find_max_used_stack_alignment (stack_alignment
, true))
7643 cfun
->machine
->max_used_stack_alignment
7644 = stack_alignment
/ BITS_PER_UNIT
;
7647 if (crtl
->stack_realign_needed
!= stack_realign
)
7648 recompute_frame_layout_p
= true;
7649 crtl
->stack_realign_needed
= stack_realign
;
7650 crtl
->stack_realign_finalized
= true;
7651 if (recompute_frame_layout_p
)
7652 ix86_compute_frame_layout ();
7655 /* Delete SET_GOT right after entry block if it is allocated to reg. */
7658 ix86_elim_entry_set_got (rtx reg
)
7660 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
7661 rtx_insn
*c_insn
= BB_HEAD (bb
);
7662 if (!NONDEBUG_INSN_P (c_insn
))
7663 c_insn
= next_nonnote_nondebug_insn (c_insn
);
7664 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
7666 rtx pat
= PATTERN (c_insn
);
7667 if (GET_CODE (pat
) == PARALLEL
)
7669 rtx vec
= XVECEXP (pat
, 0, 0);
7670 if (GET_CODE (vec
) == SET
7671 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
7672 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
7673 delete_insn (c_insn
);
7679 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
7684 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
7685 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
7686 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
7690 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
7692 return gen_frame_set (reg
, frame_reg
, offset
, false);
7696 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
7698 return gen_frame_set (reg
, frame_reg
, offset
, true);
7702 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
7704 struct machine_function
*m
= cfun
->machine
;
7705 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
7706 + m
->call_ms2sysv_extra_regs
;
7707 rtvec v
= rtvec_alloc (ncregs
+ 1);
7708 unsigned int align
, i
, vi
= 0;
7711 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
7712 const struct xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
7714 /* AL should only be live with sysv_abi. */
7715 gcc_assert (!ix86_eax_live_at_start_p ());
7716 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
7718 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
7719 we've actually realigned the stack or not. */
7720 align
= GET_MODE_ALIGNMENT (V4SFmode
);
7721 addr
= choose_baseaddr (frame
.stack_realign_offset
7722 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
7723 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
7725 emit_insn (gen_rtx_SET (rax
, addr
));
7727 /* Get the stub symbol. */
7728 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
7729 : XLOGUE_STUB_SAVE
);
7730 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
7732 for (i
= 0; i
< ncregs
; ++i
)
7734 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
7735 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
7737 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
7740 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
7742 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
7743 RTX_FRAME_RELATED_P (insn
) = true;
7746 /* Expand the prologue into a bunch of separate insns. */
7749 ix86_expand_prologue (void)
7751 struct machine_function
*m
= cfun
->machine
;
7753 HOST_WIDE_INT allocate
;
7754 bool int_registers_saved
;
7755 bool sse_registers_saved
;
7756 bool save_stub_call_needed
;
7757 rtx static_chain
= NULL_RTX
;
7759 if (ix86_function_naked (current_function_decl
))
7762 ix86_finalize_stack_frame_flags ();
7764 /* DRAP should not coexist with stack_realign_fp */
7765 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
7767 memset (&m
->fs
, 0, sizeof (m
->fs
));
7769 /* Initialize CFA state for before the prologue. */
7770 m
->fs
.cfa_reg
= stack_pointer_rtx
;
7771 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
7773 /* Track SP offset to the CFA. We continue tracking this after we've
7774 swapped the CFA register away from SP. In the case of re-alignment
7775 this is fudged; we're interested to offsets within the local frame. */
7776 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
7777 m
->fs
.sp_valid
= true;
7778 m
->fs
.sp_realigned
= false;
7780 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
7782 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
7784 /* We should have already generated an error for any use of
7785 ms_hook on a nested function. */
7786 gcc_checking_assert (!ix86_static_chain_on_stack
);
7788 /* Check if profiling is active and we shall use profiling before
7789 prologue variant. If so sorry. */
7790 if (crtl
->profile
&& flag_fentry
!= 0)
7791 sorry ("%<ms_hook_prologue%> attribute is not compatible "
7792 "with %<-mfentry%> for 32-bit");
7794 /* In ix86_asm_output_function_label we emitted:
7795 8b ff movl.s %edi,%edi
7797 8b ec movl.s %esp,%ebp
7799 This matches the hookable function prologue in Win32 API
7800 functions in Microsoft Windows XP Service Pack 2 and newer.
7801 Wine uses this to enable Windows apps to hook the Win32 API
7802 functions provided by Wine.
7804 What that means is that we've already set up the frame pointer. */
7806 if (frame_pointer_needed
7807 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
7811 /* We've decided to use the frame pointer already set up.
7812 Describe this to the unwinder by pretending that both
7813 push and mov insns happen right here.
7815 Putting the unwind info here at the end of the ms_hook
7816 is done so that we can make absolutely certain we get
7817 the required byte sequence at the start of the function,
7818 rather than relying on an assembler that can produce
7819 the exact encoding required.
7821 However it does mean (in the unpatched case) that we have
7822 a 1 insn window where the asynchronous unwind info is
7823 incorrect. However, if we placed the unwind info at
7824 its correct location we would have incorrect unwind info
7825 in the patched case. Which is probably all moot since
7826 I don't expect Wine generates dwarf2 unwind info for the
7827 system libraries that use this feature. */
7829 insn
= emit_insn (gen_blockage ());
7831 push
= gen_push (hard_frame_pointer_rtx
);
7832 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
7834 RTX_FRAME_RELATED_P (push
) = 1;
7835 RTX_FRAME_RELATED_P (mov
) = 1;
7837 RTX_FRAME_RELATED_P (insn
) = 1;
7838 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
7839 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
7841 /* Note that gen_push incremented m->fs.cfa_offset, even
7842 though we didn't emit the push insn here. */
7843 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
7844 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
7845 m
->fs
.fp_valid
= true;
7849 /* The frame pointer is not needed so pop %ebp again.
7850 This leaves us with a pristine state. */
7851 emit_insn (gen_pop (hard_frame_pointer_rtx
));
7855 /* The first insn of a function that accepts its static chain on the
7856 stack is to push the register that would be filled in by a direct
7857 call. This insn will be skipped by the trampoline. */
7858 else if (ix86_static_chain_on_stack
)
7860 static_chain
= ix86_static_chain (cfun
->decl
, false);
7861 insn
= emit_insn (gen_push (static_chain
));
7862 emit_insn (gen_blockage ());
7864 /* We don't want to interpret this push insn as a register save,
7865 only as a stack adjustment. The real copy of the register as
7866 a save will be done later, if needed. */
7867 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
7868 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
7869 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
7870 RTX_FRAME_RELATED_P (insn
) = 1;
7873 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7874 of DRAP is needed and stack realignment is really needed after reload */
7875 if (stack_realign_drap
)
7877 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
7879 /* Can't use DRAP in interrupt function. */
7880 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7881 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
7882 "in interrupt service routine. This may be worked "
7883 "around by avoiding functions with aggregate return.");
7885 /* Only need to push parameter pointer reg if it is caller saved. */
7886 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
7888 /* Push arg pointer reg */
7889 insn
= emit_insn (gen_push (crtl
->drap_reg
));
7890 RTX_FRAME_RELATED_P (insn
) = 1;
7893 /* Grab the argument pointer. */
7894 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
7895 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
7896 RTX_FRAME_RELATED_P (insn
) = 1;
7897 m
->fs
.cfa_reg
= crtl
->drap_reg
;
7898 m
->fs
.cfa_offset
= 0;
7900 /* Align the stack. */
7901 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
7903 GEN_INT (-align_bytes
)));
7904 RTX_FRAME_RELATED_P (insn
) = 1;
7906 /* Replicate the return address on the stack so that return
7907 address can be reached via (argp - 1) slot. This is needed
7908 to implement macro RETURN_ADDR_RTX and intrinsic function
7909 expand_builtin_return_addr etc. */
7910 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
7911 t
= gen_frame_mem (word_mode
, t
);
7912 insn
= emit_insn (gen_push (t
));
7913 RTX_FRAME_RELATED_P (insn
) = 1;
7915 /* For the purposes of frame and register save area addressing,
7916 we've started over with a new frame. */
7917 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
7918 m
->fs
.realigned
= true;
7922 /* Replicate static chain on the stack so that static chain
7923 can be reached via (argp - 2) slot. This is needed for
7924 nested function with stack realignment. */
7925 insn
= emit_insn (gen_push (static_chain
));
7926 RTX_FRAME_RELATED_P (insn
) = 1;
7930 int_registers_saved
= (frame
.nregs
== 0);
7931 sse_registers_saved
= (frame
.nsseregs
== 0);
7932 save_stub_call_needed
= (m
->call_ms2sysv
);
7933 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
7935 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
7937 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7938 slower on all targets. Also sdb didn't like it. */
7939 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
7940 RTX_FRAME_RELATED_P (insn
) = 1;
7942 /* Push registers now, before setting the frame pointer
7944 if (!int_registers_saved
7946 && !frame
.save_regs_using_mov
)
7948 ix86_emit_save_regs ();
7949 int_registers_saved
= true;
7950 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
7953 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
7955 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
7956 RTX_FRAME_RELATED_P (insn
) = 1;
7958 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7959 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
7960 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
7961 m
->fs
.fp_valid
= true;
7965 if (!int_registers_saved
)
7967 /* If saving registers via PUSH, do so now. */
7968 if (!frame
.save_regs_using_mov
)
7970 ix86_emit_save_regs ();
7971 int_registers_saved
= true;
7972 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
7975 /* When using red zone we may start register saving before allocating
7976 the stack frame saving one cycle of the prologue. However, avoid
7977 doing this if we have to probe the stack; at least on x86_64 the
7978 stack probe can turn into a call that clobbers a red zone location. */
7979 else if (ix86_using_red_zone ()
7980 && (! TARGET_STACK_PROBE
7981 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
7983 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
7984 int_registers_saved
= true;
7988 if (stack_realign_fp
)
7990 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
7991 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
7993 /* Record last valid frame pointer offset. */
7994 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
7996 /* The computation of the size of the re-aligned stack frame means
7997 that we must allocate the size of the register save area before
7998 performing the actual alignment. Otherwise we cannot guarantee
7999 that there's enough storage above the realignment point. */
8000 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
8001 + frame
.stack_realign_allocate
;
8003 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8004 GEN_INT (-allocate
), -1, false);
8006 /* Align the stack. */
8007 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
8009 GEN_INT (-align_bytes
)));
8010 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
8011 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
8012 - frame
.stack_realign_allocate
;
8013 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8014 Beyond this point, stack access should be done via choose_baseaddr or
8015 by using sp_valid_at and fp_valid_at to determine the correct base
8016 register. Henceforth, any CFA offset should be thought of as logical
8017 and not physical. */
8018 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
8019 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
8020 m
->fs
.sp_realigned
= true;
8022 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8023 is needed to describe where a register is saved using a realigned
8024 stack pointer, so we need to invalidate the stack pointer for that
8027 m
->fs
.sp_valid
= false;
8029 /* If SP offset is non-immediate after allocation of the stack frame,
8030 then emit SSE saves or stub call prior to allocating the rest of the
8031 stack frame. This is less efficient for the out-of-line stub because
8032 we can't combine allocations across the call barrier, but it's better
8033 than using a scratch register. */
8034 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
8035 - m
->fs
.sp_realigned_offset
),
8038 if (!sse_registers_saved
)
8040 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8041 sse_registers_saved
= true;
8043 else if (save_stub_call_needed
)
8045 ix86_emit_outlined_ms2sysv_save (frame
);
8046 save_stub_call_needed
= false;
8051 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
8053 if (flag_stack_usage_info
)
8055 /* We start to count from ARG_POINTER. */
8056 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
8058 /* If it was realigned, take into account the fake frame. */
8059 if (stack_realign_drap
)
8061 if (ix86_static_chain_on_stack
)
8062 stack_size
+= UNITS_PER_WORD
;
8064 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8065 stack_size
+= UNITS_PER_WORD
;
8067 /* This over-estimates by 1 minimal-stack-alignment-unit but
8068 mitigates that by counting in the new return address slot. */
8069 current_function_dynamic_stack_size
8070 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8073 current_function_static_stack_size
= stack_size
;
8076 /* On SEH target with very large frame size, allocate an area to save
8077 SSE registers (as the very large allocation won't be described). */
8079 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
8080 && !sse_registers_saved
)
8082 HOST_WIDE_INT sse_size
8083 = frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
8085 gcc_assert (int_registers_saved
);
8087 /* No need to do stack checking as the area will be immediately
8089 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8090 GEN_INT (-sse_size
), -1,
8091 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8092 allocate
-= sse_size
;
8093 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8094 sse_registers_saved
= true;
8097 /* The stack has already been decremented by the instruction calling us
8098 so probe if the size is non-negative to preserve the protection area. */
8100 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
8101 || flag_stack_clash_protection
))
8103 if (flag_stack_clash_protection
)
8105 ix86_adjust_stack_and_probe_stack_clash (allocate
,
8106 int_registers_saved
);
8109 else if (STACK_CHECK_MOVING_SP
)
8111 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
8112 && allocate
<= get_probe_interval ()))
8114 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
);
8120 HOST_WIDE_INT size
= allocate
;
8122 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
8123 size
= 0x80000000 - get_stack_check_protect () - 1;
8125 if (TARGET_STACK_PROBE
)
8127 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8129 if (size
> get_probe_interval ())
8130 ix86_emit_probe_stack_range (0, size
, int_registers_saved
);
8133 ix86_emit_probe_stack_range (0,
8134 size
+ get_stack_check_protect (),
8135 int_registers_saved
);
8139 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8141 if (size
> get_probe_interval ()
8142 && size
> get_stack_check_protect ())
8143 ix86_emit_probe_stack_range (get_stack_check_protect (),
8145 - get_stack_check_protect ()),
8146 int_registers_saved
);
8149 ix86_emit_probe_stack_range (get_stack_check_protect (), size
,
8150 int_registers_saved
);
8157 else if (!ix86_target_stack_probe ()
8158 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
8160 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8161 GEN_INT (-allocate
), -1,
8162 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8166 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
8168 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
8169 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8170 bool eax_live
= ix86_eax_live_at_start_p ();
8171 bool r10_live
= false;
8174 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
8178 insn
= emit_insn (gen_push (eax
));
8179 allocate
-= UNITS_PER_WORD
;
8180 /* Note that SEH directives need to continue tracking the stack
8181 pointer even after the frame pointer has been set up. */
8182 if (sp_is_cfa_reg
|| TARGET_SEH
)
8185 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8186 RTX_FRAME_RELATED_P (insn
) = 1;
8187 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8188 gen_rtx_SET (stack_pointer_rtx
,
8189 plus_constant (Pmode
, stack_pointer_rtx
,
8196 r10
= gen_rtx_REG (Pmode
, R10_REG
);
8197 insn
= emit_insn (gen_push (r10
));
8198 allocate
-= UNITS_PER_WORD
;
8199 if (sp_is_cfa_reg
|| TARGET_SEH
)
8202 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8203 RTX_FRAME_RELATED_P (insn
) = 1;
8204 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8205 gen_rtx_SET (stack_pointer_rtx
,
8206 plus_constant (Pmode
, stack_pointer_rtx
,
8211 emit_move_insn (eax
, GEN_INT (allocate
));
8212 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
8214 /* Use the fact that AX still contains ALLOCATE. */
8215 adjust_stack_insn
= (Pmode
== DImode
8216 ? gen_pro_epilogue_adjust_stack_di_sub
8217 : gen_pro_epilogue_adjust_stack_si_sub
);
8219 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
8220 stack_pointer_rtx
, eax
));
8222 if (sp_is_cfa_reg
|| TARGET_SEH
)
8225 m
->fs
.cfa_offset
+= allocate
;
8226 RTX_FRAME_RELATED_P (insn
) = 1;
8227 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8228 gen_rtx_SET (stack_pointer_rtx
,
8229 plus_constant (Pmode
, stack_pointer_rtx
,
8232 m
->fs
.sp_offset
+= allocate
;
8234 /* Use stack_pointer_rtx for relative addressing so that code works for
8235 realigned stack. But this means that we need a blockage to prevent
8236 stores based on the frame pointer from being scheduled before. */
8237 if (r10_live
&& eax_live
)
8239 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8240 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
8241 gen_frame_mem (word_mode
, t
));
8242 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
8243 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
8244 gen_frame_mem (word_mode
, t
));
8245 emit_insn (gen_memory_blockage ());
8247 else if (eax_live
|| r10_live
)
8249 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8250 emit_move_insn (gen_rtx_REG (word_mode
,
8251 (eax_live
? AX_REG
: R10_REG
)),
8252 gen_frame_mem (word_mode
, t
));
8253 emit_insn (gen_memory_blockage ());
8256 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8258 /* If we havn't already set up the frame pointer, do so now. */
8259 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8261 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
8262 GEN_INT (frame
.stack_pointer_offset
8263 - frame
.hard_frame_pointer_offset
));
8264 insn
= emit_insn (insn
);
8265 RTX_FRAME_RELATED_P (insn
) = 1;
8266 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
8268 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8269 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8270 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
8271 m
->fs
.fp_valid
= true;
8274 if (!int_registers_saved
)
8275 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8276 if (!sse_registers_saved
)
8277 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8278 else if (save_stub_call_needed
)
8279 ix86_emit_outlined_ms2sysv_save (frame
);
8281 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8283 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
8285 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
8286 insn
= emit_insn (gen_set_got (pic
));
8287 RTX_FRAME_RELATED_P (insn
) = 1;
8288 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
8289 emit_insn (gen_prologue_use (pic
));
8290 /* Deleting already emmitted SET_GOT if exist and allocated to
8291 REAL_PIC_OFFSET_TABLE_REGNUM. */
8292 ix86_elim_entry_set_got (pic
);
8295 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
8297 /* vDRAP is setup but after reload it turns out stack realign
8298 isn't necessary, here we will emit prologue to setup DRAP
8299 without stack realign adjustment */
8300 t
= choose_baseaddr (0, NULL
);
8301 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8304 /* Prevent instructions from being scheduled into register save push
8305 sequence when access to the redzone area is done through frame pointer.
8306 The offset between the frame pointer and the stack pointer is calculated
8307 relative to the value of the stack pointer at the end of the function
8308 prologue, and moving instructions that access redzone area via frame
8309 pointer inside push sequence violates this assumption. */
8310 if (frame_pointer_needed
&& frame
.red_zone_size
)
8311 emit_insn (gen_memory_blockage ());
8313 /* SEH requires that the prologue end within 256 bytes of the start of
8314 the function. Prevent instruction schedules that would extend that.
8315 Further, prevent alloca modifications to the stack pointer from being
8316 combined with prologue modifications. */
8318 emit_insn (gen_prologue_use (stack_pointer_rtx
));
8321 /* Emit code to restore REG using a POP insn. */
8324 ix86_emit_restore_reg_using_pop (rtx reg
)
8326 struct machine_function
*m
= cfun
->machine
;
8327 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
8329 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
8330 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8332 if (m
->fs
.cfa_reg
== crtl
->drap_reg
8333 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
8335 /* Previously we'd represented the CFA as an expression
8336 like *(%ebp - 8). We've just popped that value from
8337 the stack, which means we need to reset the CFA to
8338 the drap register. This will remain until we restore
8339 the stack pointer. */
8340 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8341 RTX_FRAME_RELATED_P (insn
) = 1;
8343 /* This means that the DRAP register is valid for addressing too. */
8344 m
->fs
.drap_valid
= true;
8348 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8350 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8351 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8352 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
8353 RTX_FRAME_RELATED_P (insn
) = 1;
8355 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8358 /* When the frame pointer is the CFA, and we pop it, we are
8359 swapping back to the stack pointer as the CFA. This happens
8360 for stack frames that don't allocate other data, so we assume
8361 the stack pointer is now pointing at the return address, i.e.
8362 the function entry state, which makes the offset be 1 word. */
8363 if (reg
== hard_frame_pointer_rtx
)
8365 m
->fs
.fp_valid
= false;
8366 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8368 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8369 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8371 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8372 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
8373 GEN_INT (m
->fs
.cfa_offset
)));
8374 RTX_FRAME_RELATED_P (insn
) = 1;
8379 /* Emit code to restore saved registers using POP insns. */
8382 ix86_emit_restore_regs_using_pop (void)
8386 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8387 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
8388 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
8391 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8392 omits the emit and only attaches the notes. */
8395 ix86_emit_leave (rtx_insn
*insn
)
8397 struct machine_function
*m
= cfun
->machine
;
8399 insn
= emit_insn (ix86_gen_leave ());
8401 ix86_add_queued_cfa_restore_notes (insn
);
8403 gcc_assert (m
->fs
.fp_valid
);
8404 m
->fs
.sp_valid
= true;
8405 m
->fs
.sp_realigned
= false;
8406 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
8407 m
->fs
.fp_valid
= false;
8409 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8411 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8412 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
8414 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8415 plus_constant (Pmode
, stack_pointer_rtx
,
8417 RTX_FRAME_RELATED_P (insn
) = 1;
8419 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
8423 /* Emit code to restore saved registers using MOV insns.
8424 First register is restored from CFA - CFA_OFFSET. */
8426 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8427 bool maybe_eh_return
)
8429 struct machine_function
*m
= cfun
->machine
;
8432 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8433 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8435 rtx reg
= gen_rtx_REG (word_mode
, regno
);
8439 mem
= choose_baseaddr (cfa_offset
, NULL
);
8440 mem
= gen_frame_mem (word_mode
, mem
);
8441 insn
= emit_move_insn (reg
, mem
);
8443 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8445 /* Previously we'd represented the CFA as an expression
8446 like *(%ebp - 8). We've just popped that value from
8447 the stack, which means we need to reset the CFA to
8448 the drap register. This will remain until we restore
8449 the stack pointer. */
8450 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8451 RTX_FRAME_RELATED_P (insn
) = 1;
8453 /* This means that the DRAP register is valid for addressing. */
8454 m
->fs
.drap_valid
= true;
8457 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8459 cfa_offset
-= UNITS_PER_WORD
;
8463 /* Emit code to restore saved registers using MOV insns.
8464 First register is restored from CFA - CFA_OFFSET. */
8466 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8467 bool maybe_eh_return
)
8471 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8472 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8474 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
8476 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
8478 mem
= choose_baseaddr (cfa_offset
, &align
);
8479 mem
= gen_rtx_MEM (V4SFmode
, mem
);
8481 /* The location aligment depends upon the base register. */
8482 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
8483 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
8484 set_mem_align (mem
, align
);
8485 emit_insn (gen_rtx_SET (reg
, mem
));
8487 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8489 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
8494 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
8495 bool use_call
, int style
)
8497 struct machine_function
*m
= cfun
->machine
;
8498 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8499 + m
->call_ms2sysv_extra_regs
;
8501 unsigned int elems_needed
, align
, i
, vi
= 0;
8504 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
8506 const struct xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8507 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
8508 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
8509 rtx rsi_frame_load
= NULL_RTX
;
8510 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
8511 enum xlogue_stub stub
;
8513 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
8515 /* If using a realigned stack, we should never start with padding. */
8516 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
8518 /* Setup RSI as the stub's base pointer. */
8519 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8520 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
8521 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8523 emit_insn (gen_rtx_SET (rsi
, tmp
));
8525 /* Get a symbol for the stub. */
8526 if (frame_pointer_needed
)
8527 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
8528 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
8530 stub
= use_call
? XLOGUE_STUB_RESTORE
8531 : XLOGUE_STUB_RESTORE_TAIL
;
8532 sym
= xlogue
.get_stub_rtx (stub
);
8534 elems_needed
= ncregs
;
8538 elems_needed
+= frame_pointer_needed
? 5 : 3;
8539 v
= rtvec_alloc (elems_needed
);
8541 /* We call the epilogue stub when we need to pop incoming args or we are
8542 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8543 epilogue stub and it is the tail-call. */
8545 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8548 RTVEC_ELT (v
, vi
++) = ret_rtx
;
8549 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8550 if (frame_pointer_needed
)
8552 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
8553 gcc_assert (m
->fs
.fp_valid
);
8554 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
8556 tmp
= gen_rtx_PLUS (DImode
, rbp
, GEN_INT (8));
8557 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
8558 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
8559 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
8560 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
8564 /* If no hard frame pointer, we set R10 to the SP restore value. */
8565 gcc_assert (!m
->fs
.fp_valid
);
8566 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8567 gcc_assert (m
->fs
.sp_valid
);
8569 r10
= gen_rtx_REG (DImode
, R10_REG
);
8570 tmp
= gen_rtx_PLUS (Pmode
, rsi
, GEN_INT (stub_ptr_offset
));
8571 emit_insn (gen_rtx_SET (r10
, tmp
));
8573 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
8577 /* Generate frame load insns and restore notes. */
8578 for (i
= 0; i
< ncregs
; ++i
)
8580 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8581 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
8582 rtx reg
, frame_load
;
8584 reg
= gen_rtx_REG (mode
, r
.regno
);
8585 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
8587 /* Save RSI frame load insn & note to add last. */
8588 if (r
.regno
== SI_REG
)
8590 gcc_assert (!rsi_frame_load
);
8591 rsi_frame_load
= frame_load
;
8592 rsi_restore_offset
= r
.offset
;
8596 RTVEC_ELT (v
, vi
++) = frame_load
;
8597 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
8601 /* Add RSI frame load & restore note at the end. */
8602 gcc_assert (rsi_frame_load
);
8603 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
8604 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
8605 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
8606 rsi_restore_offset
);
8608 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
8609 if (!use_call
&& !frame_pointer_needed
)
8611 gcc_assert (m
->fs
.sp_valid
);
8612 gcc_assert (!m
->fs
.sp_realigned
);
8614 /* At this point, R10 should point to frame.stack_realign_offset. */
8615 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8616 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
8617 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
8620 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
8621 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
8623 insn
= emit_insn (tmp
);
8626 insn
= emit_jump_insn (tmp
);
8627 JUMP_LABEL (insn
) = ret_rtx
;
8629 if (frame_pointer_needed
)
8630 ix86_emit_leave (insn
);
8633 /* Need CFA adjust note. */
8634 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
8635 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
8639 RTX_FRAME_RELATED_P (insn
) = true;
8640 ix86_add_queued_cfa_restore_notes (insn
);
8642 /* If we're not doing a tail-call, we need to adjust the stack. */
8643 if (use_call
&& m
->fs
.sp_valid
)
8645 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
8646 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8647 GEN_INT (dealloc
), style
,
8648 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8652 /* Restore function stack, frame, and registers. */
8655 ix86_expand_epilogue (int style
)
8657 struct machine_function
*m
= cfun
->machine
;
8658 struct machine_frame_state frame_state_save
= m
->fs
;
8659 bool restore_regs_via_mov
;
8661 bool restore_stub_is_tail
= false;
8663 if (ix86_function_naked (current_function_decl
))
8665 /* The program should not reach this point. */
8666 emit_insn (gen_ud2 ());
8670 ix86_finalize_stack_frame_flags ();
8671 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
8673 m
->fs
.sp_realigned
= stack_realign_fp
;
8674 m
->fs
.sp_valid
= stack_realign_fp
8675 || !frame_pointer_needed
8676 || crtl
->sp_is_unchanging
;
8677 gcc_assert (!m
->fs
.sp_valid
8678 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8680 /* The FP must be valid if the frame pointer is present. */
8681 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
8682 gcc_assert (!m
->fs
.fp_valid
8683 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
8685 /* We must have *some* valid pointer to the stack frame. */
8686 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
8688 /* The DRAP is never valid at this point. */
8689 gcc_assert (!m
->fs
.drap_valid
);
8691 /* See the comment about red zone and frame
8692 pointer usage in ix86_expand_prologue. */
8693 if (frame_pointer_needed
&& frame
.red_zone_size
)
8694 emit_insn (gen_memory_blockage ());
8696 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
8697 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
8699 /* Determine the CFA offset of the end of the red-zone. */
8700 m
->fs
.red_zone_offset
= 0;
8701 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
8703 /* The red-zone begins below return address and error code in
8704 exception handler. */
8705 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
8707 /* When the register save area is in the aligned portion of
8708 the stack, determine the maximum runtime displacement that
8709 matches up with the aligned frame. */
8710 if (stack_realign_drap
)
8711 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
8715 HOST_WIDE_INT reg_save_offset
= frame
.reg_save_offset
;
8717 /* Special care must be taken for the normal return case of a function
8718 using eh_return: the eax and edx registers are marked as saved, but
8719 not restored along this path. Adjust the save location to match. */
8720 if (crtl
->calls_eh_return
&& style
!= 2)
8721 reg_save_offset
-= 2 * UNITS_PER_WORD
;
8723 /* EH_RETURN requires the use of moves to function properly. */
8724 if (crtl
->calls_eh_return
)
8725 restore_regs_via_mov
= true;
8726 /* SEH requires the use of pops to identify the epilogue. */
8727 else if (TARGET_SEH
)
8728 restore_regs_via_mov
= false;
8729 /* If we're only restoring one register and sp cannot be used then
8730 using a move instruction to restore the register since it's
8731 less work than reloading sp and popping the register. */
8732 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
8733 restore_regs_via_mov
= true;
8734 else if (TARGET_EPILOGUE_USING_MOVE
8735 && cfun
->machine
->use_fast_prologue_epilogue
8737 || m
->fs
.sp_offset
!= reg_save_offset
))
8738 restore_regs_via_mov
= true;
8739 else if (frame_pointer_needed
8741 && m
->fs
.sp_offset
!= reg_save_offset
)
8742 restore_regs_via_mov
= true;
8743 else if (frame_pointer_needed
8745 && cfun
->machine
->use_fast_prologue_epilogue
8746 && frame
.nregs
== 1)
8747 restore_regs_via_mov
= true;
8749 restore_regs_via_mov
= false;
8751 if (restore_regs_via_mov
|| frame
.nsseregs
)
8753 /* Ensure that the entire register save area is addressable via
8754 the stack pointer, if we will restore SSE regs via sp. */
8756 && m
->fs
.sp_offset
> 0x7fffffff
8757 && sp_valid_at (frame
.stack_realign_offset
+ 1)
8758 && (frame
.nsseregs
+ frame
.nregs
) != 0)
8760 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8761 GEN_INT (m
->fs
.sp_offset
8762 - frame
.sse_reg_save_offset
),
8764 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8768 /* If there are any SSE registers to restore, then we have to do it
8769 via moves, since there's obviously no pop for SSE regs. */
8771 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
8774 if (m
->call_ms2sysv
)
8776 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
8778 /* We cannot use a tail-call for the stub if:
8779 1. We have to pop incoming args,
8780 2. We have additional int regs to restore, or
8781 3. A sibling call will be the tail-call, or
8782 4. We are emitting an eh_return_internal epilogue.
8784 TODO: Item 4 has not yet tested!
8786 If any of the above are true, we will call the stub rather than
8788 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
8789 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
8792 /* If using out-of-line stub that is a tail-call, then...*/
8793 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
8795 /* TODO: parinoid tests. (remove eventually) */
8796 gcc_assert (m
->fs
.sp_valid
);
8797 gcc_assert (!m
->fs
.sp_realigned
);
8798 gcc_assert (!m
->fs
.fp_valid
);
8799 gcc_assert (!m
->fs
.realigned
);
8800 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
8801 gcc_assert (!crtl
->drap_reg
);
8802 gcc_assert (!frame
.nregs
);
8804 else if (restore_regs_via_mov
)
8809 ix86_emit_restore_regs_using_mov (reg_save_offset
, style
== 2);
8811 /* eh_return epilogues need %ecx added to the stack pointer. */
8814 rtx sa
= EH_RETURN_STACKADJ_RTX
;
8817 /* %ecx can't be used for both DRAP register and eh_return. */
8819 gcc_assert (REGNO (crtl
->drap_reg
) != CX_REG
);
8821 /* regparm nested functions don't work with eh_return. */
8822 gcc_assert (!ix86_static_chain_on_stack
);
8824 if (frame_pointer_needed
)
8826 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
8827 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
8828 emit_insn (gen_rtx_SET (sa
, t
));
8830 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
8831 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
8833 /* Note that we use SA as a temporary CFA, as the return
8834 address is at the proper place relative to it. We
8835 pretend this happens at the FP restore insn because
8836 prior to this insn the FP would be stored at the wrong
8837 offset relative to SA, and after this insn we have no
8838 other reasonable register to use for the CFA. We don't
8839 bother resetting the CFA to the SP for the duration of
8840 the return insn, unless the control flow instrumentation
8841 is done. In this case the SP is used later and we have
8842 to reset CFA to SP. */
8843 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8844 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
8845 ix86_add_queued_cfa_restore_notes (insn
);
8846 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
8847 RTX_FRAME_RELATED_P (insn
) = 1;
8850 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
8851 m
->fs
.fp_valid
= false;
8853 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
8855 flag_cf_protection
);
8859 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
8860 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
8861 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
8862 ix86_add_queued_cfa_restore_notes (insn
);
8864 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8865 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
8867 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
8868 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8869 plus_constant (Pmode
, stack_pointer_rtx
,
8871 RTX_FRAME_RELATED_P (insn
) = 1;
8874 m
->fs
.sp_offset
= UNITS_PER_WORD
;
8875 m
->fs
.sp_valid
= true;
8876 m
->fs
.sp_realigned
= false;
8881 /* SEH requires that the function end with (1) a stack adjustment
8882 if necessary, (2) a sequence of pops, and (3) a return or
8883 jump instruction. Prevent insns from the function body from
8884 being scheduled into this sequence. */
8887 /* Prevent a catch region from being adjacent to the standard
8888 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
8889 nor several other flags that would be interesting to test are
8891 if (flag_non_call_exceptions
)
8892 emit_insn (gen_nops (const1_rtx
));
8894 emit_insn (gen_blockage ());
8897 /* First step is to deallocate the stack frame so that we can
8898 pop the registers. If the stack pointer was realigned, it needs
8899 to be restored now. Also do it on SEH target for very large
8900 frame as the emitted instructions aren't allowed by the ABI
8902 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
8904 && (m
->fs
.sp_offset
- reg_save_offset
8905 >= SEH_MAX_FRAME_SIZE
)))
8907 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
8908 GEN_INT (m
->fs
.fp_offset
8912 else if (m
->fs
.sp_offset
!= reg_save_offset
)
8914 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8915 GEN_INT (m
->fs
.sp_offset
8918 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8921 ix86_emit_restore_regs_using_pop ();
8924 /* If we used a stack pointer and haven't already got rid of it,
8928 /* If the stack pointer is valid and pointing at the frame
8929 pointer store address, then we only need a pop. */
8930 if (sp_valid_at (frame
.hfp_save_offset
)
8931 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
8932 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
8933 /* Leave results in shorter dependency chains on CPUs that are
8934 able to grok it fast. */
8935 else if (TARGET_USE_LEAVE
8936 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
8937 || !cfun
->machine
->use_fast_prologue_epilogue
)
8938 ix86_emit_leave (NULL
);
8941 pro_epilogue_adjust_stack (stack_pointer_rtx
,
8942 hard_frame_pointer_rtx
,
8943 const0_rtx
, style
, !using_drap
);
8944 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
8950 int param_ptr_offset
= UNITS_PER_WORD
;
8953 gcc_assert (stack_realign_drap
);
8955 if (ix86_static_chain_on_stack
)
8956 param_ptr_offset
+= UNITS_PER_WORD
;
8957 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8958 param_ptr_offset
+= UNITS_PER_WORD
;
8960 insn
= emit_insn (gen_rtx_SET
8962 gen_rtx_PLUS (Pmode
,
8964 GEN_INT (-param_ptr_offset
))));
8965 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8966 m
->fs
.cfa_offset
= param_ptr_offset
;
8967 m
->fs
.sp_offset
= param_ptr_offset
;
8968 m
->fs
.realigned
= false;
8970 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8971 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
8972 GEN_INT (param_ptr_offset
)));
8973 RTX_FRAME_RELATED_P (insn
) = 1;
8975 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8976 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
8979 /* At this point the stack pointer must be valid, and we must have
8980 restored all of the registers. We may not have deallocated the
8981 entire stack frame. We've delayed this until now because it may
8982 be possible to merge the local stack deallocation with the
8983 deallocation forced by ix86_static_chain_on_stack. */
8984 gcc_assert (m
->fs
.sp_valid
);
8985 gcc_assert (!m
->fs
.sp_realigned
);
8986 gcc_assert (!m
->fs
.fp_valid
);
8987 gcc_assert (!m
->fs
.realigned
);
8988 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
8990 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8991 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
8995 ix86_add_queued_cfa_restore_notes (get_last_insn ());
8997 /* Sibcall epilogues don't want a return instruction. */
9000 m
->fs
= frame_state_save
;
9004 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
9005 emit_jump_insn (gen_interrupt_return ());
9006 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
9008 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
9010 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9011 address, do explicit add, and jump indirectly to the caller. */
9013 if (crtl
->args
.pops_args
>= 65536)
9015 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
9018 /* There is no "pascal" calling convention in any 64bit ABI. */
9019 gcc_assert (!TARGET_64BIT
);
9021 insn
= emit_insn (gen_pop (ecx
));
9022 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9023 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9025 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9026 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9027 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9028 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9029 RTX_FRAME_RELATED_P (insn
) = 1;
9031 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9033 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9036 emit_jump_insn (gen_simple_return_pop_internal (popc
));
9038 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
9040 /* In case of return from EH a simple return cannot be used
9041 as a return address will be compared with a shadow stack
9042 return address. Use indirect jump instead. */
9043 if (style
== 2 && flag_cf_protection
)
9045 /* Register used in indirect jump must be in word_mode. But
9046 Pmode may not be the same as word_mode for x32. */
9047 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
9050 insn
= emit_insn (gen_pop (ecx
));
9051 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9052 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9054 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9055 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9056 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9057 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9058 RTX_FRAME_RELATED_P (insn
) = 1;
9060 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9063 emit_jump_insn (gen_simple_return_internal ());
9066 /* Restore the state back to the state from the prologue,
9067 so that it's correct for the next epilogue. */
9068 m
->fs
= frame_state_save
;
9071 /* Reset from the function's potential modifications. */
9074 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
9076 if (pic_offset_table_rtx
9077 && !ix86_use_pseudo_pic_reg ())
9078 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9082 rtx_insn
*insn
= get_last_insn ();
9083 rtx_insn
*deleted_debug_label
= NULL
;
9085 /* Mach-O doesn't support labels at the end of objects, so if
9086 it looks like we might want one, take special action.
9087 First, collect any sequence of deleted debug labels. */
9090 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
9092 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9093 notes only, instead set their CODE_LABEL_NUMBER to -1,
9094 otherwise there would be code generation differences
9095 in between -g and -g0. */
9096 if (NOTE_P (insn
) && NOTE_KIND (insn
)
9097 == NOTE_INSN_DELETED_DEBUG_LABEL
)
9098 deleted_debug_label
= insn
;
9099 insn
= PREV_INSN (insn
);
9105 then this needs to be detected, so skip past the barrier. */
9107 if (insn
&& BARRIER_P (insn
))
9108 insn
= PREV_INSN (insn
);
9110 /* Up to now we've only seen notes or barriers. */
9115 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
9116 /* Trailing label. */
9117 fputs ("\tnop\n", file
);
9118 else if (cfun
&& ! cfun
->is_thunk
)
9120 /* See if we have a completely empty function body, skipping
9121 the special case of the picbase thunk emitted as asm. */
9122 while (insn
&& ! INSN_P (insn
))
9123 insn
= PREV_INSN (insn
);
9124 /* If we don't find any insns, we've got an empty function body;
9125 I.e. completely empty - without a return or branch. This is
9126 taken as the case where a function body has been removed
9127 because it contains an inline __builtin_unreachable(). GCC
9128 declares that reaching __builtin_unreachable() means UB so
9129 we're not obliged to do anything special; however, we want
9130 non-zero-sized function bodies. To meet this, and help the
9131 user out, let's trap the case. */
9133 fputs ("\tud2\n", file
);
9136 else if (deleted_debug_label
)
9137 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
9138 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
9139 CODE_LABEL_NUMBER (insn
) = -1;
9143 /* Return a scratch register to use in the split stack prologue. The
9144 split stack prologue is used for -fsplit-stack. It is the first
9145 instructions in the function, even before the regular prologue.
9146 The scratch register can be any caller-saved register which is not
9147 used for parameters or for the static chain. */
9150 split_stack_prologue_scratch_regno (void)
9156 bool is_fastcall
, is_thiscall
;
9159 is_fastcall
= (lookup_attribute ("fastcall",
9160 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9162 is_thiscall
= (lookup_attribute ("thiscall",
9163 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9165 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
9169 if (DECL_STATIC_CHAIN (cfun
->decl
))
9171 sorry ("%<-fsplit-stack%> does not support fastcall with "
9173 return INVALID_REGNUM
;
9177 else if (is_thiscall
)
9179 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9183 else if (regparm
< 3)
9185 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9191 sorry ("%<-fsplit-stack%> does not support 2 register "
9192 "parameters for a nested function");
9193 return INVALID_REGNUM
;
9200 /* FIXME: We could make this work by pushing a register
9201 around the addition and comparison. */
9202 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9203 return INVALID_REGNUM
;
9208 /* A SYMBOL_REF for the function which allocates new stackspace for
9211 static GTY(()) rtx split_stack_fn
;
9213 /* A SYMBOL_REF for the more stack function when using the large
9216 static GTY(()) rtx split_stack_fn_large
;
9218 /* Return location of the stack guard value in the TLS block. */
9221 ix86_split_stack_guard (void)
9224 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
9227 gcc_assert (flag_split_stack
);
9229 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9230 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
9235 r
= GEN_INT (offset
);
9236 r
= gen_const_mem (Pmode
, r
);
9237 set_mem_addr_space (r
, as
);
9242 /* Handle -fsplit-stack. These are the first instructions in the
9243 function, even before the regular prologue. */
9246 ix86_expand_split_stack_prologue (void)
9248 HOST_WIDE_INT allocate
;
9249 unsigned HOST_WIDE_INT args_size
;
9250 rtx_code_label
*label
;
9251 rtx limit
, current
, allocate_rtx
, call_fusage
;
9252 rtx_insn
*call_insn
;
9253 rtx scratch_reg
= NULL_RTX
;
9254 rtx_code_label
*varargs_label
= NULL
;
9257 gcc_assert (flag_split_stack
&& reload_completed
);
9259 ix86_finalize_stack_frame_flags ();
9260 struct ix86_frame
&frame
= cfun
->machine
->frame
;
9261 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
9263 /* This is the label we will branch to if we have enough stack
9264 space. We expect the basic block reordering pass to reverse this
9265 branch if optimizing, so that we branch in the unlikely case. */
9266 label
= gen_label_rtx ();
9268 /* We need to compare the stack pointer minus the frame size with
9269 the stack boundary in the TCB. The stack boundary always gives
9270 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9271 can compare directly. Otherwise we need to do an addition. */
9273 limit
= ix86_split_stack_guard ();
9275 if (allocate
< SPLIT_STACK_AVAILABLE
)
9276 current
= stack_pointer_rtx
;
9279 unsigned int scratch_regno
;
9282 /* We need a scratch register to hold the stack pointer minus
9283 the required frame size. Since this is the very start of the
9284 function, the scratch register can be any caller-saved
9285 register which is not used for parameters. */
9286 offset
= GEN_INT (- allocate
);
9287 scratch_regno
= split_stack_prologue_scratch_regno ();
9288 if (scratch_regno
== INVALID_REGNUM
)
9290 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9291 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
9293 /* We don't use ix86_gen_add3 in this case because it will
9294 want to split to lea, but when not optimizing the insn
9295 will not be split after this point. */
9296 emit_insn (gen_rtx_SET (scratch_reg
,
9297 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9302 emit_move_insn (scratch_reg
, offset
);
9303 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
9304 stack_pointer_rtx
));
9306 current
= scratch_reg
;
9309 ix86_expand_branch (GEU
, current
, limit
, label
);
9310 rtx_insn
*jump_insn
= get_last_insn ();
9311 JUMP_LABEL (jump_insn
) = label
;
9313 /* Mark the jump as very likely to be taken. */
9314 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
9316 if (split_stack_fn
== NULL_RTX
)
9318 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
9319 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
9321 fn
= split_stack_fn
;
9323 /* Get more stack space. We pass in the desired stack space and the
9324 size of the arguments to copy to the new stack. In 32-bit mode
9325 we push the parameters; __morestack will return on a new stack
9326 anyhow. In 64-bit mode we pass the parameters in r10 and
9328 allocate_rtx
= GEN_INT (allocate
);
9329 args_size
= crtl
->args
.size
>= 0 ? (HOST_WIDE_INT
) crtl
->args
.size
: 0;
9330 call_fusage
= NULL_RTX
;
9336 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
9337 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
9339 /* If this function uses a static chain, it will be in %r10.
9340 Preserve it across the call to __morestack. */
9341 if (DECL_STATIC_CHAIN (cfun
->decl
))
9345 rax
= gen_rtx_REG (word_mode
, AX_REG
);
9346 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
9347 use_reg (&call_fusage
, rax
);
9350 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
9353 HOST_WIDE_INT argval
;
9355 gcc_assert (Pmode
== DImode
);
9356 /* When using the large model we need to load the address
9357 into a register, and we've run out of registers. So we
9358 switch to a different calling convention, and we call a
9359 different function: __morestack_large. We pass the
9360 argument size in the upper 32 bits of r10 and pass the
9361 frame size in the lower 32 bits. */
9362 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
9363 gcc_assert ((args_size
& 0xffffffff) == args_size
);
9365 if (split_stack_fn_large
== NULL_RTX
)
9367 split_stack_fn_large
9368 = gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
9369 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
9371 if (ix86_cmodel
== CM_LARGE_PIC
)
9373 rtx_code_label
*label
;
9376 label
= gen_label_rtx ();
9378 LABEL_PRESERVE_P (label
) = 1;
9379 emit_insn (gen_set_rip_rex64 (reg10
, label
));
9380 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
9381 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
9382 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
9384 x
= gen_rtx_CONST (Pmode
, x
);
9385 emit_move_insn (reg11
, x
);
9386 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
9387 x
= gen_const_mem (Pmode
, x
);
9388 emit_move_insn (reg11
, x
);
9391 emit_move_insn (reg11
, split_stack_fn_large
);
9395 argval
= ((args_size
<< 16) << 16) + allocate
;
9396 emit_move_insn (reg10
, GEN_INT (argval
));
9400 emit_move_insn (reg10
, allocate_rtx
);
9401 emit_move_insn (reg11
, GEN_INT (args_size
));
9402 use_reg (&call_fusage
, reg11
);
9405 use_reg (&call_fusage
, reg10
);
9409 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
9410 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
9411 insn
= emit_insn (gen_push (allocate_rtx
));
9412 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
9413 pop
= GEN_INT (2 * UNITS_PER_WORD
);
9415 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
9416 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
9418 add_function_usage_to (call_insn
, call_fusage
);
9420 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
9421 /* Indicate that this function can't jump to non-local gotos. */
9422 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
9424 /* In order to make call/return prediction work right, we now need
9425 to execute a return instruction. See
9426 libgcc/config/i386/morestack.S for the details on how this works.
9428 For flow purposes gcc must not see this as a return
9429 instruction--we need control flow to continue at the subsequent
9430 label. Therefore, we use an unspec. */
9431 gcc_assert (crtl
->args
.pops_args
< 65536);
9433 = emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
9435 if ((flag_cf_protection
& CF_BRANCH
))
9437 /* Insert ENDBR since __morestack will jump back here via indirect
9439 rtx cet_eb
= gen_nop_endbr ();
9440 emit_insn_after (cet_eb
, ret_insn
);
9443 /* If we are in 64-bit mode and this function uses a static chain,
9444 we saved %r10 in %rax before calling _morestack. */
9445 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
9446 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
9447 gen_rtx_REG (word_mode
, AX_REG
));
9449 /* If this function calls va_start, we need to store a pointer to
9450 the arguments on the old stack, because they may not have been
9451 all copied to the new stack. At this point the old stack can be
9452 found at the frame pointer value used by __morestack, because
9453 __morestack has set that up before calling back to us. Here we
9454 store that pointer in a scratch register, and in
9455 ix86_expand_prologue we store the scratch register in a stack
9457 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9459 unsigned int scratch_regno
;
9463 scratch_regno
= split_stack_prologue_scratch_regno ();
9464 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9465 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
9469 return address within this function
9470 return address of caller of this function
9472 So we add three words to get to the stack arguments.
9476 return address within this function
9477 first argument to __morestack
9478 second argument to __morestack
9479 return address of caller of this function
9481 So we add five words to get to the stack arguments.
9483 words
= TARGET_64BIT
? 3 : 5;
9484 emit_insn (gen_rtx_SET (scratch_reg
,
9485 gen_rtx_PLUS (Pmode
, frame_reg
,
9486 GEN_INT (words
* UNITS_PER_WORD
))));
9488 varargs_label
= gen_label_rtx ();
9489 emit_jump_insn (gen_jump (varargs_label
));
9490 JUMP_LABEL (get_last_insn ()) = varargs_label
;
9496 LABEL_NUSES (label
) = 1;
9498 /* If this function calls va_start, we now have to set the scratch
9499 register for the case where we do not call __morestack. In this
9500 case we need to set it based on the stack pointer. */
9501 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9503 emit_insn (gen_rtx_SET (scratch_reg
,
9504 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9505 GEN_INT (UNITS_PER_WORD
))));
9507 emit_label (varargs_label
);
9508 LABEL_NUSES (varargs_label
) = 1;
9512 /* We may have to tell the dataflow pass that the split stack prologue
9513 is initializing a scratch register. */
9516 ix86_live_on_entry (bitmap regs
)
9518 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9520 gcc_assert (flag_split_stack
);
9521 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
9525 /* Extract the parts of an RTL expression that is a valid memory address
9526 for an instruction. Return 0 if the structure of the address is
9527 grossly off. Return -1 if the address contains ASHIFT, so it is not
9528 strictly valid, but still used for computing length of lea instruction. */
9531 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
9533 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
9534 rtx base_reg
, index_reg
;
9535 HOST_WIDE_INT scale
= 1;
9536 rtx scale_rtx
= NULL_RTX
;
9539 addr_space_t seg
= ADDR_SPACE_GENERIC
;
9541 /* Allow zero-extended SImode addresses,
9542 they will be emitted with addr32 prefix. */
9543 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
9545 if (GET_CODE (addr
) == ZERO_EXTEND
9546 && GET_MODE (XEXP (addr
, 0)) == SImode
)
9548 addr
= XEXP (addr
, 0);
9549 if (CONST_INT_P (addr
))
9552 else if (GET_CODE (addr
) == AND
9553 && const_32bit_mask (XEXP (addr
, 1), DImode
))
9555 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
9556 if (addr
== NULL_RTX
)
9559 if (CONST_INT_P (addr
))
9564 /* Allow SImode subregs of DImode addresses,
9565 they will be emitted with addr32 prefix. */
9566 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
9569 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
9571 addr
= SUBREG_REG (addr
);
9572 if (CONST_INT_P (addr
))
9579 else if (SUBREG_P (addr
))
9581 if (REG_P (SUBREG_REG (addr
)))
9586 else if (GET_CODE (addr
) == PLUS
)
9596 addends
[n
++] = XEXP (op
, 1);
9599 while (GET_CODE (op
) == PLUS
);
9604 for (i
= n
; i
>= 0; --i
)
9607 switch (GET_CODE (op
))
9612 index
= XEXP (op
, 0);
9613 scale_rtx
= XEXP (op
, 1);
9619 index
= XEXP (op
, 0);
9621 if (!CONST_INT_P (tmp
))
9623 scale
= INTVAL (tmp
);
9624 if ((unsigned HOST_WIDE_INT
) scale
> 3)
9631 if (GET_CODE (op
) != UNSPEC
)
9636 if (XINT (op
, 1) == UNSPEC_TP
9637 && TARGET_TLS_DIRECT_SEG_REFS
9638 && seg
== ADDR_SPACE_GENERIC
)
9639 seg
= DEFAULT_TLS_SEG_REG
;
9645 if (!REG_P (SUBREG_REG (op
)))
9672 else if (GET_CODE (addr
) == MULT
)
9674 index
= XEXP (addr
, 0); /* index*scale */
9675 scale_rtx
= XEXP (addr
, 1);
9677 else if (GET_CODE (addr
) == ASHIFT
)
9679 /* We're called for lea too, which implements ashift on occasion. */
9680 index
= XEXP (addr
, 0);
9681 tmp
= XEXP (addr
, 1);
9682 if (!CONST_INT_P (tmp
))
9684 scale
= INTVAL (tmp
);
9685 if ((unsigned HOST_WIDE_INT
) scale
> 3)
9691 disp
= addr
; /* displacement */
9697 else if (SUBREG_P (index
)
9698 && REG_P (SUBREG_REG (index
)))
9704 /* Extract the integral value of scale. */
9707 if (!CONST_INT_P (scale_rtx
))
9709 scale
= INTVAL (scale_rtx
);
9712 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
9713 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
9715 /* Avoid useless 0 displacement. */
9716 if (disp
== const0_rtx
&& (base
|| index
))
9719 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9720 if (base_reg
&& index_reg
&& scale
== 1
9721 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
9722 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
9723 || REGNO (index_reg
) == SP_REG
))
9725 std::swap (base
, index
);
9726 std::swap (base_reg
, index_reg
);
9729 /* Special case: %ebp cannot be encoded as a base without a displacement.
9731 if (!disp
&& base_reg
9732 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
9733 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
9734 || REGNO (base_reg
) == BP_REG
9735 || REGNO (base_reg
) == R13_REG
))
9738 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9739 Avoid this by transforming to [%esi+0].
9740 Reload calls address legitimization without cfun defined, so we need
9741 to test cfun for being non-NULL. */
9742 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
9743 && base_reg
&& !index_reg
&& !disp
9744 && REGNO (base_reg
) == SI_REG
)
9747 /* Special case: encode reg+reg instead of reg*2. */
9748 if (!base
&& index
&& scale
== 2)
9749 base
= index
, base_reg
= index_reg
, scale
= 1;
9751 /* Special case: scaling cannot be encoded without base or displacement. */
9752 if (!base
&& !disp
&& index
&& scale
!= 1)
9764 /* Return cost of the memory address x.
9765 For i386, it is better to use a complex address than let gcc copy
9766 the address into a reg and make a new pseudo. But not if the address
9767 requires to two regs - that would mean more pseudos with longer
9770 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
9772 struct ix86_address parts
;
9774 int ok
= ix86_decompose_address (x
, &parts
);
9778 if (parts
.base
&& SUBREG_P (parts
.base
))
9779 parts
.base
= SUBREG_REG (parts
.base
);
9780 if (parts
.index
&& SUBREG_P (parts
.index
))
9781 parts
.index
= SUBREG_REG (parts
.index
);
9783 /* Attempt to minimize number of registers in the address by increasing
9784 address cost for each used register. We don't increase address cost
9785 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
9786 is not invariant itself it most likely means that base or index is not
9787 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
9788 which is not profitable for x86. */
9790 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
9791 && (current_pass
->type
== GIMPLE_PASS
9792 || !pic_offset_table_rtx
9793 || !REG_P (parts
.base
)
9794 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
9798 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
9799 && (current_pass
->type
== GIMPLE_PASS
9800 || !pic_offset_table_rtx
9801 || !REG_P (parts
.index
)
9802 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
9805 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9806 since it's predecode logic can't detect the length of instructions
9807 and it degenerates to vector decoded. Increase cost of such
9808 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9809 to split such addresses or even refuse such addresses at all.
9811 Following addressing modes are affected:
9816 The first and last case may be avoidable by explicitly coding the zero in
9817 memory address, but I don't have AMD-K6 machine handy to check this
9821 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
9822 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
9823 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
9829 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9830 this is used for to form addresses to local data when -fPIC is in
9834 darwin_local_data_pic (rtx disp
)
9836 return (GET_CODE (disp
) == UNSPEC
9837 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
9840 /* True if operand X should be loaded from GOT. */
9843 ix86_force_load_from_GOT_p (rtx x
)
9845 return ((TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
9846 && !TARGET_PECOFF
&& !TARGET_MACHO
9848 && ix86_cmodel
!= CM_LARGE
9849 && GET_CODE (x
) == SYMBOL_REF
9850 && SYMBOL_REF_FUNCTION_P (x
)
9852 || (SYMBOL_REF_DECL (x
)
9853 && lookup_attribute ("noplt",
9854 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
)))))
9855 && !SYMBOL_REF_LOCAL_P (x
));
9858 /* Determine if a given RTX is a valid constant. We already know this
9859 satisfies CONSTANT_P. */
9862 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
9864 switch (GET_CODE (x
))
9869 if (GET_CODE (x
) == PLUS
)
9871 if (!CONST_INT_P (XEXP (x
, 1)))
9876 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
9879 /* Only some unspecs are valid as "constants". */
9880 if (GET_CODE (x
) == UNSPEC
)
9881 switch (XINT (x
, 1))
9886 return TARGET_64BIT
;
9889 x
= XVECEXP (x
, 0, 0);
9890 return (GET_CODE (x
) == SYMBOL_REF
9891 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
9893 x
= XVECEXP (x
, 0, 0);
9894 return (GET_CODE (x
) == SYMBOL_REF
9895 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
9900 /* We must have drilled down to a symbol. */
9901 if (GET_CODE (x
) == LABEL_REF
)
9903 if (GET_CODE (x
) != SYMBOL_REF
)
9908 /* TLS symbols are never valid. */
9909 if (SYMBOL_REF_TLS_MODEL (x
))
9912 /* DLLIMPORT symbols are never valid. */
9913 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9914 && SYMBOL_REF_DLLIMPORT_P (x
))
9918 /* mdynamic-no-pic */
9919 if (MACHO_DYNAMIC_NO_PIC_P
)
9920 return machopic_symbol_defined_p (x
);
9923 /* External function address should be loaded
9924 via the GOT slot to avoid PLT. */
9925 if (ix86_force_load_from_GOT_p (x
))
9930 CASE_CONST_SCALAR_INT
:
9939 if (!standard_sse_constant_p (x
, mode
))
9947 if (!standard_sse_constant_p (x
, mode
))
9954 /* Otherwise we handle everything else in the move patterns. */
9958 /* Determine if it's legal to put X into the constant pool. This
9959 is not possible for the address of thread-local symbols, which
9960 is checked above. */
9963 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
9965 /* We can put any immediate constant in memory. */
9966 switch (GET_CODE (x
))
9975 return !ix86_legitimate_constant_p (mode
, x
);
9978 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
9982 is_imported_p (rtx x
)
9984 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
9985 || GET_CODE (x
) != SYMBOL_REF
)
9988 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
9992 /* Nonzero if the constant value X is a legitimate general operand
9993 when generating PIC code. It is given that flag_pic is on and
9994 that X satisfies CONSTANT_P. */
9997 legitimate_pic_operand_p (rtx x
)
10001 switch (GET_CODE (x
))
10004 inner
= XEXP (x
, 0);
10005 if (GET_CODE (inner
) == PLUS
10006 && CONST_INT_P (XEXP (inner
, 1)))
10007 inner
= XEXP (inner
, 0);
10009 /* Only some unspecs are valid as "constants". */
10010 if (GET_CODE (inner
) == UNSPEC
)
10011 switch (XINT (inner
, 1))
10014 case UNSPEC_GOTOFF
:
10015 case UNSPEC_PLTOFF
:
10016 return TARGET_64BIT
;
10018 x
= XVECEXP (inner
, 0, 0);
10019 return (GET_CODE (x
) == SYMBOL_REF
10020 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10021 case UNSPEC_MACHOPIC_OFFSET
:
10022 return legitimate_pic_address_disp_p (x
);
10030 return legitimate_pic_address_disp_p (x
);
10037 /* Determine if a given CONST RTX is a valid memory displacement
10041 legitimate_pic_address_disp_p (rtx disp
)
10045 /* In 64bit mode we can allow direct addresses of symbols and labels
10046 when they are not dynamic symbols. */
10049 rtx op0
= disp
, op1
;
10051 switch (GET_CODE (disp
))
10057 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
10059 op0
= XEXP (XEXP (disp
, 0), 0);
10060 op1
= XEXP (XEXP (disp
, 0), 1);
10061 if (!CONST_INT_P (op1
))
10063 if (GET_CODE (op0
) == UNSPEC
10064 && (XINT (op0
, 1) == UNSPEC_DTPOFF
10065 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
10066 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
10068 if (INTVAL (op1
) >= 16*1024*1024
10069 || INTVAL (op1
) < -16*1024*1024)
10071 if (GET_CODE (op0
) == LABEL_REF
)
10073 if (GET_CODE (op0
) == CONST
10074 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
10075 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
10077 if (GET_CODE (op0
) == UNSPEC
10078 && XINT (op0
, 1) == UNSPEC_PCREL
)
10080 if (GET_CODE (op0
) != SYMBOL_REF
)
10085 /* TLS references should always be enclosed in UNSPEC.
10086 The dllimported symbol needs always to be resolved. */
10087 if (SYMBOL_REF_TLS_MODEL (op0
)
10088 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
10093 if (is_imported_p (op0
))
10096 if (SYMBOL_REF_FAR_ADDR_P (op0
)
10097 || !SYMBOL_REF_LOCAL_P (op0
))
10100 /* Function-symbols need to be resolved only for
10102 For the small-model we don't need to resolve anything
10104 if ((ix86_cmodel
!= CM_LARGE_PIC
10105 && SYMBOL_REF_FUNCTION_P (op0
))
10106 || ix86_cmodel
== CM_SMALL_PIC
)
10108 /* Non-external symbols don't need to be resolved for
10109 large, and medium-model. */
10110 if ((ix86_cmodel
== CM_LARGE_PIC
10111 || ix86_cmodel
== CM_MEDIUM_PIC
)
10112 && !SYMBOL_REF_EXTERNAL_P (op0
))
10115 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
10116 && (SYMBOL_REF_LOCAL_P (op0
)
10117 || (HAVE_LD_PIE_COPYRELOC
10119 && !SYMBOL_REF_WEAK (op0
)
10120 && !SYMBOL_REF_FUNCTION_P (op0
)))
10121 && ix86_cmodel
!= CM_LARGE_PIC
)
10129 if (GET_CODE (disp
) != CONST
)
10131 disp
= XEXP (disp
, 0);
10135 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10136 of GOT tables. We should not need these anyway. */
10137 if (GET_CODE (disp
) != UNSPEC
10138 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
10139 && XINT (disp
, 1) != UNSPEC_GOTOFF
10140 && XINT (disp
, 1) != UNSPEC_PCREL
10141 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
10144 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
10145 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
10151 if (GET_CODE (disp
) == PLUS
)
10153 if (!CONST_INT_P (XEXP (disp
, 1)))
10155 disp
= XEXP (disp
, 0);
10159 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
10162 if (GET_CODE (disp
) != UNSPEC
)
10165 switch (XINT (disp
, 1))
10170 /* We need to check for both symbols and labels because VxWorks loads
10171 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10173 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10174 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
10175 case UNSPEC_GOTOFF
:
10176 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10177 While ABI specify also 32bit relocation but we don't produce it in
10178 small PIC model at all. */
10179 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10180 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
10182 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
10184 case UNSPEC_GOTTPOFF
:
10185 case UNSPEC_GOTNTPOFF
:
10186 case UNSPEC_INDNTPOFF
:
10189 disp
= XVECEXP (disp
, 0, 0);
10190 return (GET_CODE (disp
) == SYMBOL_REF
10191 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
10192 case UNSPEC_NTPOFF
:
10193 disp
= XVECEXP (disp
, 0, 0);
10194 return (GET_CODE (disp
) == SYMBOL_REF
10195 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
10196 case UNSPEC_DTPOFF
:
10197 disp
= XVECEXP (disp
, 0, 0);
10198 return (GET_CODE (disp
) == SYMBOL_REF
10199 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
10205 /* Determine if op is suitable RTX for an address register.
10206 Return naked register if a register or a register subreg is
10207 found, otherwise return NULL_RTX. */
10210 ix86_validate_address_register (rtx op
)
10212 machine_mode mode
= GET_MODE (op
);
10214 /* Only SImode or DImode registers can form the address. */
10215 if (mode
!= SImode
&& mode
!= DImode
)
10220 else if (SUBREG_P (op
))
10222 rtx reg
= SUBREG_REG (op
);
10227 mode
= GET_MODE (reg
);
10229 /* Don't allow SUBREGs that span more than a word. It can
10230 lead to spill failures when the register is one word out
10231 of a two word structure. */
10232 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
10235 /* Allow only SUBREGs of non-eliminable hard registers. */
10236 if (register_no_elim_operand (reg
, mode
))
10240 /* Op is not a register. */
10244 /* Recognizes RTL expressions that are valid memory addresses for an
10245 instruction. The MODE argument is the machine mode for the MEM
10246 expression that wants to use this address.
10248 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10249 convert common non-canonical forms to canonical form so that they will
10253 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
)
10255 struct ix86_address parts
;
10256 rtx base
, index
, disp
;
10257 HOST_WIDE_INT scale
;
10260 if (ix86_decompose_address (addr
, &parts
) <= 0)
10261 /* Decomposition failed. */
10265 index
= parts
.index
;
10267 scale
= parts
.scale
;
10270 /* Validate base register. */
10273 rtx reg
= ix86_validate_address_register (base
);
10275 if (reg
== NULL_RTX
)
10278 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
10279 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
10280 /* Base is not valid. */
10284 /* Validate index register. */
10287 rtx reg
= ix86_validate_address_register (index
);
10289 if (reg
== NULL_RTX
)
10292 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
10293 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
10294 /* Index is not valid. */
10298 /* Index and base should have the same mode. */
10300 && GET_MODE (base
) != GET_MODE (index
))
10303 /* Address override works only on the (%reg) part of %fs:(%reg). */
10304 if (seg
!= ADDR_SPACE_GENERIC
10305 && ((base
&& GET_MODE (base
) != word_mode
)
10306 || (index
&& GET_MODE (index
) != word_mode
)))
10309 /* Validate scale factor. */
10313 /* Scale without index. */
10316 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
10317 /* Scale is not a valid multiplier. */
10321 /* Validate displacement. */
10324 if (GET_CODE (disp
) == CONST
10325 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
10326 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
10327 switch (XINT (XEXP (disp
, 0), 1))
10329 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10330 when used. While ABI specify also 32bit relocations, we
10331 don't produce them at all and use IP relative instead.
10332 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10333 should be loaded via GOT. */
10336 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10337 goto is_legitimate_pic
;
10339 case UNSPEC_GOTOFF
:
10340 gcc_assert (flag_pic
);
10342 goto is_legitimate_pic
;
10344 /* 64bit address unspec. */
10347 case UNSPEC_GOTPCREL
:
10348 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10349 goto is_legitimate_pic
;
10352 gcc_assert (flag_pic
);
10353 goto is_legitimate_pic
;
10355 case UNSPEC_GOTTPOFF
:
10356 case UNSPEC_GOTNTPOFF
:
10357 case UNSPEC_INDNTPOFF
:
10358 case UNSPEC_NTPOFF
:
10359 case UNSPEC_DTPOFF
:
10363 /* Invalid address unspec. */
10367 else if (SYMBOLIC_CONST (disp
)
10371 && MACHOPIC_INDIRECT
10372 && !machopic_operand_p (disp
)
10378 if (TARGET_64BIT
&& (index
|| base
))
10380 /* foo@dtpoff(%rX) is ok. */
10381 if (GET_CODE (disp
) != CONST
10382 || GET_CODE (XEXP (disp
, 0)) != PLUS
10383 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
10384 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
10385 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
10386 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
10387 /* Non-constant pic memory reference. */
10390 else if ((!TARGET_MACHO
|| flag_pic
)
10391 && ! legitimate_pic_address_disp_p (disp
))
10392 /* Displacement is an invalid pic construct. */
10395 else if (MACHO_DYNAMIC_NO_PIC_P
10396 && !ix86_legitimate_constant_p (Pmode
, disp
))
10397 /* displacment must be referenced via non_lazy_pointer */
10401 /* This code used to verify that a symbolic pic displacement
10402 includes the pic_offset_table_rtx register.
10404 While this is good idea, unfortunately these constructs may
10405 be created by "adds using lea" optimization for incorrect
10414 This code is nonsensical, but results in addressing
10415 GOT table with pic_offset_table_rtx base. We can't
10416 just refuse it easily, since it gets matched by
10417 "addsi3" pattern, that later gets split to lea in the
10418 case output register differs from input. While this
10419 can be handled by separate addsi pattern for this case
10420 that never results in lea, this seems to be easier and
10421 correct fix for crash to disable this test. */
10423 else if (GET_CODE (disp
) != LABEL_REF
10424 && !CONST_INT_P (disp
)
10425 && (GET_CODE (disp
) != CONST
10426 || !ix86_legitimate_constant_p (Pmode
, disp
))
10427 && (GET_CODE (disp
) != SYMBOL_REF
10428 || !ix86_legitimate_constant_p (Pmode
, disp
)))
10429 /* Displacement is not constant. */
10431 else if (TARGET_64BIT
10432 && !x86_64_immediate_operand (disp
, VOIDmode
))
10433 /* Displacement is out of range. */
10435 /* In x32 mode, constant addresses are sign extended to 64bit, so
10436 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10437 else if (TARGET_X32
&& !(index
|| base
)
10438 && CONST_INT_P (disp
)
10439 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
10443 /* Everything looks valid. */
10447 /* Determine if a given RTX is a valid constant address. */
10450 constant_address_p (rtx x
)
10452 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
10455 /* Return a unique alias set for the GOT. */
10458 ix86_GOT_alias_set (void)
10460 static alias_set_type set
= -1;
10462 set
= new_alias_set ();
10466 /* Return a legitimate reference for ORIG (an address) using the
10467 register REG. If REG is 0, a new pseudo is generated.
10469 There are two types of references that must be handled:
10471 1. Global data references must load the address from the GOT, via
10472 the PIC reg. An insn is emitted to do this load, and the reg is
10475 2. Static data references, constant pool addresses, and code labels
10476 compute the address as an offset from the GOT, whose base is in
10477 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10478 differentiate them from global data objects. The returned
10479 address is the PIC reg + an unspec constant.
10481 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10482 reg also appears in the address. */
10485 legitimize_pic_address (rtx orig
, rtx reg
)
10488 rtx new_rtx
= orig
;
10491 if (TARGET_MACHO
&& !TARGET_64BIT
)
10494 reg
= gen_reg_rtx (Pmode
);
10495 /* Use the generic Mach-O PIC machinery. */
10496 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
10500 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
10502 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10507 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
10509 else if ((!TARGET_64BIT
10510 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
10512 && gotoff_operand (addr
, Pmode
))
10514 /* This symbol may be referenced via a displacement
10515 from the PIC base address (@GOTOFF). */
10516 if (GET_CODE (addr
) == CONST
)
10517 addr
= XEXP (addr
, 0);
10519 if (GET_CODE (addr
) == PLUS
)
10521 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
10523 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
10526 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
10528 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10531 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
10535 gcc_assert (REG_P (reg
));
10536 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
10537 new_rtx
, reg
, 1, OPTAB_DIRECT
);
10540 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10542 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
10543 /* We can't use @GOTOFF for text labels
10544 on VxWorks, see gotoff_operand. */
10545 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
10547 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10551 /* For x64 PE-COFF there is no GOT table,
10552 so we use address directly. */
10553 if (TARGET_64BIT
&& TARGET_PECOFF
)
10555 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
10556 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10558 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
10560 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
10562 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10563 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
10564 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
10568 /* This symbol must be referenced via a load
10569 from the Global Offset Table (@GOT). */
10570 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
10571 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10573 new_rtx
= force_reg (Pmode
, new_rtx
);
10574 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10575 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
10576 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
10579 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
10583 if (CONST_INT_P (addr
)
10584 && !x86_64_immediate_operand (addr
, VOIDmode
))
10585 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
10586 else if (GET_CODE (addr
) == CONST
)
10588 addr
= XEXP (addr
, 0);
10590 /* We must match stuff we generate before. Assume the only
10591 unspecs that can get here are ours. Not that we could do
10592 anything with them anyway.... */
10593 if (GET_CODE (addr
) == UNSPEC
10594 || (GET_CODE (addr
) == PLUS
10595 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
10597 gcc_assert (GET_CODE (addr
) == PLUS
);
10600 if (GET_CODE (addr
) == PLUS
)
10602 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
10604 /* Check first to see if this is a constant
10605 offset from a @GOTOFF symbol reference. */
10607 && gotoff_operand (op0
, Pmode
)
10608 && CONST_INT_P (op1
))
10612 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
10614 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
10615 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10619 gcc_assert (REG_P (reg
));
10620 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
10621 pic_offset_table_rtx
,
10627 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10631 if (INTVAL (op1
) < -16*1024*1024
10632 || INTVAL (op1
) >= 16*1024*1024)
10634 if (!x86_64_immediate_operand (op1
, Pmode
))
10635 op1
= force_reg (Pmode
, op1
);
10638 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
10644 rtx base
= legitimize_pic_address (op0
, reg
);
10645 machine_mode mode
= GET_MODE (base
);
10647 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
10649 if (CONST_INT_P (new_rtx
))
10651 if (INTVAL (new_rtx
) < -16*1024*1024
10652 || INTVAL (new_rtx
) >= 16*1024*1024)
10654 if (!x86_64_immediate_operand (new_rtx
, mode
))
10655 new_rtx
= force_reg (mode
, new_rtx
);
10658 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
10661 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
10665 /* For %rip addressing, we have to use
10666 just disp32, not base nor index. */
10668 && (GET_CODE (base
) == SYMBOL_REF
10669 || GET_CODE (base
) == LABEL_REF
))
10670 base
= force_reg (mode
, base
);
10671 if (GET_CODE (new_rtx
) == PLUS
10672 && CONSTANT_P (XEXP (new_rtx
, 1)))
10674 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
10675 new_rtx
= XEXP (new_rtx
, 1);
10677 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
10685 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10688 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
10690 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
10692 if (GET_MODE (tp
) != tp_mode
)
10694 gcc_assert (GET_MODE (tp
) == SImode
);
10695 gcc_assert (tp_mode
== DImode
);
10697 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
10701 tp
= copy_to_mode_reg (tp_mode
, tp
);
10706 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10708 static GTY(()) rtx ix86_tls_symbol
;
10711 ix86_tls_get_addr (void)
10713 if (!ix86_tls_symbol
)
10716 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
10717 ? "___tls_get_addr" : "__tls_get_addr");
10719 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
10722 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
10724 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
10726 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
10727 gen_rtx_CONST (Pmode
, unspec
));
10730 return ix86_tls_symbol
;
10733 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
10735 static GTY(()) rtx ix86_tls_module_base_symbol
;
10738 ix86_tls_module_base (void)
10740 if (!ix86_tls_module_base_symbol
)
10742 ix86_tls_module_base_symbol
10743 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
10745 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
10746 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
10749 return ix86_tls_module_base_symbol
;
10752 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10753 false if we expect this to be used for a memory address and true if
10754 we expect to load the address into a register. */
10757 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
10759 rtx dest
, base
, off
;
10760 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
10761 machine_mode tp_mode
= Pmode
;
10764 /* Fall back to global dynamic model if tool chain cannot support local
10766 if (TARGET_SUN_TLS
&& !TARGET_64BIT
10767 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
10768 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
10769 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
10773 case TLS_MODEL_GLOBAL_DYNAMIC
:
10774 dest
= gen_reg_rtx (Pmode
);
10778 if (flag_pic
&& !TARGET_PECOFF
)
10779 pic
= pic_offset_table_rtx
;
10782 pic
= gen_reg_rtx (Pmode
);
10783 emit_insn (gen_set_got (pic
));
10787 if (TARGET_GNU2_TLS
)
10790 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
10792 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
10794 tp
= get_thread_pointer (Pmode
, true);
10795 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
10797 if (GET_MODE (x
) != Pmode
)
10798 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
10800 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
10804 rtx caddr
= ix86_tls_get_addr ();
10808 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
10813 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
10814 insns
= get_insns ();
10817 if (GET_MODE (x
) != Pmode
)
10818 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
10820 RTL_CONST_CALL_P (insns
) = 1;
10821 emit_libcall_block (insns
, dest
, rax
, x
);
10824 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
10828 case TLS_MODEL_LOCAL_DYNAMIC
:
10829 base
= gen_reg_rtx (Pmode
);
10834 pic
= pic_offset_table_rtx
;
10837 pic
= gen_reg_rtx (Pmode
);
10838 emit_insn (gen_set_got (pic
));
10842 if (TARGET_GNU2_TLS
)
10844 rtx tmp
= ix86_tls_module_base ();
10847 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
10849 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
10851 tp
= get_thread_pointer (Pmode
, true);
10852 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
10853 gen_rtx_MINUS (Pmode
, tmp
, tp
));
10857 rtx caddr
= ix86_tls_get_addr ();
10861 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
10867 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
10868 insns
= get_insns ();
10871 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
10872 share the LD_BASE result with other LD model accesses. */
10873 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
10874 UNSPEC_TLS_LD_BASE
);
10876 RTL_CONST_CALL_P (insns
) = 1;
10877 emit_libcall_block (insns
, base
, rax
, eqv
);
10880 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
10883 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
10884 off
= gen_rtx_CONST (Pmode
, off
);
10886 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
10888 if (TARGET_GNU2_TLS
)
10890 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
10892 if (GET_MODE (x
) != Pmode
)
10893 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
10895 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
10899 case TLS_MODEL_INITIAL_EXEC
:
10902 if (TARGET_SUN_TLS
&& !TARGET_X32
)
10904 /* The Sun linker took the AMD64 TLS spec literally
10905 and can only handle %rax as destination of the
10906 initial executable code sequence. */
10908 dest
= gen_reg_rtx (DImode
);
10909 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
10913 /* Generate DImode references to avoid %fs:(%reg32)
10914 problems and linker IE->LE relaxation bug. */
10917 type
= UNSPEC_GOTNTPOFF
;
10921 pic
= pic_offset_table_rtx
;
10922 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
10924 else if (!TARGET_ANY_GNU_TLS
)
10926 pic
= gen_reg_rtx (Pmode
);
10927 emit_insn (gen_set_got (pic
));
10928 type
= UNSPEC_GOTTPOFF
;
10933 type
= UNSPEC_INDNTPOFF
;
10936 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
10937 off
= gen_rtx_CONST (tp_mode
, off
);
10939 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
10940 off
= gen_const_mem (tp_mode
, off
);
10941 set_mem_alias_set (off
, ix86_GOT_alias_set ());
10943 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10945 base
= get_thread_pointer (tp_mode
,
10946 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
10947 off
= force_reg (tp_mode
, off
);
10948 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
10949 if (tp_mode
!= Pmode
)
10950 dest
= convert_to_mode (Pmode
, dest
, 1);
10954 base
= get_thread_pointer (Pmode
, true);
10955 dest
= gen_reg_rtx (Pmode
);
10956 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
10960 case TLS_MODEL_LOCAL_EXEC
:
10961 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
10962 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10963 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
10964 off
= gen_rtx_CONST (Pmode
, off
);
10966 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10968 base
= get_thread_pointer (Pmode
,
10969 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
10970 return gen_rtx_PLUS (Pmode
, base
, off
);
10974 base
= get_thread_pointer (Pmode
, true);
10975 dest
= gen_reg_rtx (Pmode
);
10976 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
10981 gcc_unreachable ();
10987 /* Return true if OP refers to a TLS address. */
10989 ix86_tls_address_pattern_p (rtx op
)
10991 subrtx_var_iterator::array_type array
;
10992 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
10997 rtx
*x
= &XEXP (op
, 0);
10998 while (GET_CODE (*x
) == PLUS
)
11001 for (i
= 0; i
< 2; i
++)
11003 rtx u
= XEXP (*x
, i
);
11004 if (GET_CODE (u
) == ZERO_EXTEND
)
11006 if (GET_CODE (u
) == UNSPEC
11007 && XINT (u
, 1) == UNSPEC_TP
)
11013 iter
.skip_subrtxes ();
11020 /* Rewrite *LOC so that it refers to a default TLS address space. */
11022 ix86_rewrite_tls_address_1 (rtx
*loc
)
11024 subrtx_ptr_iterator::array_type array
;
11025 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
11030 rtx addr
= XEXP (*loc
, 0);
11032 while (GET_CODE (*x
) == PLUS
)
11035 for (i
= 0; i
< 2; i
++)
11037 rtx u
= XEXP (*x
, i
);
11038 if (GET_CODE (u
) == ZERO_EXTEND
)
11040 if (GET_CODE (u
) == UNSPEC
11041 && XINT (u
, 1) == UNSPEC_TP
)
11043 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
11045 *x
= XEXP (*x
, 1 - i
);
11047 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
11048 set_mem_addr_space (*loc
, as
);
11055 iter
.skip_subrtxes ();
11060 /* Rewrite instruction pattern involvning TLS address
11061 so that it refers to a default TLS address space. */
11063 ix86_rewrite_tls_address (rtx pattern
)
11065 pattern
= copy_insn (pattern
);
11066 ix86_rewrite_tls_address_1 (&pattern
);
11070 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11071 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11072 unique refptr-DECL symbol corresponding to symbol DECL. */
11074 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
11076 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
11078 equal (tree_map
*a
, tree_map
*b
)
11080 return a
->base
.from
== b
->base
.from
;
11084 keep_cache_entry (tree_map
*&m
)
11086 return ggc_marked_p (m
->base
.from
);
11090 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
11093 get_dllimport_decl (tree decl
, bool beimport
)
11095 struct tree_map
*h
, in
;
11097 const char *prefix
;
11098 size_t namelen
, prefixlen
;
11103 if (!dllimport_map
)
11104 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
11106 in
.hash
= htab_hash_pointer (decl
);
11107 in
.base
.from
= decl
;
11108 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
11113 *loc
= h
= ggc_alloc
<tree_map
> ();
11115 h
->base
.from
= decl
;
11116 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
11117 VAR_DECL
, NULL
, ptr_type_node
);
11118 DECL_ARTIFICIAL (to
) = 1;
11119 DECL_IGNORED_P (to
) = 1;
11120 DECL_EXTERNAL (to
) = 1;
11121 TREE_READONLY (to
) = 1;
11123 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
11124 name
= targetm
.strip_name_encoding (name
);
11126 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
11127 ? "*__imp_" : "*__imp__";
11129 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
11130 namelen
= strlen (name
);
11131 prefixlen
= strlen (prefix
);
11132 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
11133 memcpy (imp_name
, prefix
, prefixlen
);
11134 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
11136 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
11137 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11138 SET_SYMBOL_REF_DECL (rtl
, to
);
11139 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
11142 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
11143 #ifdef SUB_TARGET_RECORD_STUB
11144 SUB_TARGET_RECORD_STUB (name
);
11148 rtl
= gen_const_mem (Pmode
, rtl
);
11149 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
11151 SET_DECL_RTL (to
, rtl
);
11152 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
11157 /* Expand SYMBOL into its corresponding far-address symbol.
11158 WANT_REG is true if we require the result be a register. */
11161 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
11166 gcc_assert (SYMBOL_REF_DECL (symbol
));
11167 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
11169 x
= DECL_RTL (imp_decl
);
11171 x
= force_reg (Pmode
, x
);
11175 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11176 true if we require the result be a register. */
11179 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
11184 gcc_assert (SYMBOL_REF_DECL (symbol
));
11185 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
11187 x
= DECL_RTL (imp_decl
);
11189 x
= force_reg (Pmode
, x
);
11193 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11194 is true if we require the result be a register. */
11197 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
11199 if (!TARGET_PECOFF
)
11202 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11204 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
11205 return legitimize_dllimport_symbol (addr
, inreg
);
11206 if (GET_CODE (addr
) == CONST
11207 && GET_CODE (XEXP (addr
, 0)) == PLUS
11208 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11209 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
11211 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
11212 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11216 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
11218 if (GET_CODE (addr
) == SYMBOL_REF
11219 && !is_imported_p (addr
)
11220 && SYMBOL_REF_EXTERNAL_P (addr
)
11221 && SYMBOL_REF_DECL (addr
))
11222 return legitimize_pe_coff_extern_decl (addr
, inreg
);
11224 if (GET_CODE (addr
) == CONST
11225 && GET_CODE (XEXP (addr
, 0)) == PLUS
11226 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11227 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
11228 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
11229 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
11231 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
11232 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11237 /* Try machine-dependent ways of modifying an illegitimate address
11238 to be legitimate. If we find one, return the new, valid address.
11239 This macro is used in only one place: `memory_address' in explow.c.
11241 OLDX is the address as it was before break_out_memory_refs was called.
11242 In some cases it is useful to look at this to decide what needs to be done.
11244 It is always safe for this macro to do nothing. It exists to recognize
11245 opportunities to optimize the output.
11247 For the 80386, we handle X+REG by loading X into a register R and
11248 using R+REG. R will go in a general reg and indexing will be used.
11249 However, if REG is a broken-out memory address or multiplication,
11250 nothing needs to be done because REG can certainly go in a general reg.
11252 When -fpic is used, special handling is needed for symbolic references.
11253 See comments by legitimize_pic_address in i386.c for details. */
11256 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
11258 bool changed
= false;
11261 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
11263 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
11264 if (GET_CODE (x
) == CONST
11265 && GET_CODE (XEXP (x
, 0)) == PLUS
11266 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
11267 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
11269 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
11270 (enum tls_model
) log
, false);
11271 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
11274 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11276 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
11281 if (flag_pic
&& SYMBOLIC_CONST (x
))
11282 return legitimize_pic_address (x
, 0);
11285 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
11286 return machopic_indirect_data_reference (x
, 0);
11289 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11290 if (GET_CODE (x
) == ASHIFT
11291 && CONST_INT_P (XEXP (x
, 1))
11292 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
11295 log
= INTVAL (XEXP (x
, 1));
11296 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
11297 GEN_INT (1 << log
));
11300 if (GET_CODE (x
) == PLUS
)
11302 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11304 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
11305 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11306 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
11309 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
11310 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
11311 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
11312 GEN_INT (1 << log
));
11315 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
11316 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11317 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
11320 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
11321 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
11322 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
11323 GEN_INT (1 << log
));
11326 /* Put multiply first if it isn't already. */
11327 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11329 std::swap (XEXP (x
, 0), XEXP (x
, 1));
11333 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11334 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11335 created by virtual register instantiation, register elimination, and
11336 similar optimizations. */
11337 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
11340 x
= gen_rtx_PLUS (Pmode
,
11341 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
11342 XEXP (XEXP (x
, 1), 0)),
11343 XEXP (XEXP (x
, 1), 1));
11347 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11348 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11349 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
11350 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11351 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
11352 && CONSTANT_P (XEXP (x
, 1)))
11355 rtx other
= NULL_RTX
;
11357 if (CONST_INT_P (XEXP (x
, 1)))
11359 constant
= XEXP (x
, 1);
11360 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11362 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
11364 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11365 other
= XEXP (x
, 1);
11373 x
= gen_rtx_PLUS (Pmode
,
11374 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
11375 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
11376 plus_constant (Pmode
, other
,
11377 INTVAL (constant
)));
11381 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11384 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11387 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
11390 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11393 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
11397 && REG_P (XEXP (x
, 1))
11398 && REG_P (XEXP (x
, 0)))
11401 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
11404 x
= legitimize_pic_address (x
, 0);
11407 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11410 if (REG_P (XEXP (x
, 0)))
11412 rtx temp
= gen_reg_rtx (Pmode
);
11413 rtx val
= force_operand (XEXP (x
, 1), temp
);
11416 val
= convert_to_mode (Pmode
, val
, 1);
11417 emit_move_insn (temp
, val
);
11420 XEXP (x
, 1) = temp
;
11424 else if (REG_P (XEXP (x
, 1)))
11426 rtx temp
= gen_reg_rtx (Pmode
);
11427 rtx val
= force_operand (XEXP (x
, 0), temp
);
11430 val
= convert_to_mode (Pmode
, val
, 1);
11431 emit_move_insn (temp
, val
);
11434 XEXP (x
, 0) = temp
;
11442 /* Print an integer constant expression in assembler syntax. Addition
11443 and subtraction are the only arithmetic that may appear in these
11444 expressions. FILE is the stdio stream to write to, X is the rtx, and
11445 CODE is the operand print code from the output string. */
11448 output_pic_addr_const (FILE *file
, rtx x
, int code
)
11452 switch (GET_CODE (x
))
11455 gcc_assert (flag_pic
);
11460 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
11461 output_addr_const (file
, x
);
11464 const char *name
= XSTR (x
, 0);
11466 /* Mark the decl as referenced so that cgraph will
11467 output the function. */
11468 if (SYMBOL_REF_DECL (x
))
11469 mark_decl_referenced (SYMBOL_REF_DECL (x
));
11472 if (MACHOPIC_INDIRECT
11473 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
11474 name
= machopic_indirection_name (x
, /*stub_p=*/true);
11476 assemble_name (file
, name
);
11478 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
11479 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
11480 fputs ("@PLT", file
);
11487 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
11488 assemble_name (asm_out_file
, buf
);
11492 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
11496 /* This used to output parentheses around the expression,
11497 but that does not work on the 386 (either ATT or BSD assembler). */
11498 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11502 /* We can't handle floating point constants;
11503 TARGET_PRINT_OPERAND must handle them. */
11504 output_operand_lossage ("floating constant misused");
11508 /* Some assemblers need integer constants to appear first. */
11509 if (CONST_INT_P (XEXP (x
, 0)))
11511 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11513 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11517 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
11518 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11520 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11526 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
11527 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11529 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11531 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
11535 gcc_assert (XVECLEN (x
, 0) == 1);
11536 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
11537 switch (XINT (x
, 1))
11540 fputs ("@GOT", file
);
11542 case UNSPEC_GOTOFF
:
11543 fputs ("@GOTOFF", file
);
11545 case UNSPEC_PLTOFF
:
11546 fputs ("@PLTOFF", file
);
11549 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11550 "(%rip)" : "[rip]", file
);
11552 case UNSPEC_GOTPCREL
:
11553 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11554 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
11556 case UNSPEC_GOTTPOFF
:
11557 /* FIXME: This might be @TPOFF in Sun ld too. */
11558 fputs ("@gottpoff", file
);
11561 fputs ("@tpoff", file
);
11563 case UNSPEC_NTPOFF
:
11565 fputs ("@tpoff", file
);
11567 fputs ("@ntpoff", file
);
11569 case UNSPEC_DTPOFF
:
11570 fputs ("@dtpoff", file
);
11572 case UNSPEC_GOTNTPOFF
:
11574 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11575 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
11577 fputs ("@gotntpoff", file
);
11579 case UNSPEC_INDNTPOFF
:
11580 fputs ("@indntpoff", file
);
11583 case UNSPEC_MACHOPIC_OFFSET
:
11585 machopic_output_function_base_name (file
);
11589 output_operand_lossage ("invalid UNSPEC as operand");
11595 output_operand_lossage ("invalid expression as operand");
11599 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11600 We need to emit DTP-relative relocations. */
11602 static void ATTRIBUTE_UNUSED
11603 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
11605 fputs (ASM_LONG
, file
);
11606 output_addr_const (file
, x
);
11607 fputs ("@dtpoff", file
);
11613 fputs (", 0", file
);
11616 gcc_unreachable ();
11620 /* Return true if X is a representation of the PIC register. This copes
11621 with calls from ix86_find_base_term, where the register might have
11622 been replaced by a cselib value. */
11625 ix86_pic_register_p (rtx x
)
11627 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
11628 return (pic_offset_table_rtx
11629 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
11630 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SET_GOT
)
11632 else if (!REG_P (x
))
11634 else if (pic_offset_table_rtx
)
11636 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
11638 if (HARD_REGISTER_P (x
)
11639 && !HARD_REGISTER_P (pic_offset_table_rtx
)
11640 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
11645 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
11648 /* Helper function for ix86_delegitimize_address.
11649 Attempt to delegitimize TLS local-exec accesses. */
11652 ix86_delegitimize_tls_address (rtx orig_x
)
11654 rtx x
= orig_x
, unspec
;
11655 struct ix86_address addr
;
11657 if (!TARGET_TLS_DIRECT_SEG_REFS
)
11661 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
11663 if (ix86_decompose_address (x
, &addr
) == 0
11664 || addr
.seg
!= DEFAULT_TLS_SEG_REG
11665 || addr
.disp
== NULL_RTX
11666 || GET_CODE (addr
.disp
) != CONST
)
11668 unspec
= XEXP (addr
.disp
, 0);
11669 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
11670 unspec
= XEXP (unspec
, 0);
11671 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
11673 x
= XVECEXP (unspec
, 0, 0);
11674 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
11675 if (unspec
!= XEXP (addr
.disp
, 0))
11676 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
11679 rtx idx
= addr
.index
;
11680 if (addr
.scale
!= 1)
11681 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
11682 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
11685 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
11686 if (MEM_P (orig_x
))
11687 x
= replace_equiv_address_nv (orig_x
, x
);
11691 /* In the name of slightly smaller debug output, and to cater to
11692 general assembler lossage, recognize PIC+GOTOFF and turn it back
11693 into a direct symbol reference.
11695 On Darwin, this is necessary to avoid a crash, because Darwin
11696 has a different PIC label for each routine but the DWARF debugging
11697 information is not associated with any particular routine, so it's
11698 necessary to remove references to the PIC label from RTL stored by
11699 the DWARF output code.
11701 This helper is used in the normal ix86_delegitimize_address
11702 entrypoint (e.g. used in the target delegitimization hook) and
11703 in ix86_find_base_term. As compile time memory optimization, we
11704 avoid allocating rtxes that will not change anything on the outcome
11705 of the callers (find_base_value and find_base_term). */
11708 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
11710 rtx orig_x
= delegitimize_mem_from_attrs (x
);
11711 /* addend is NULL or some rtx if x is something+GOTOFF where
11712 something doesn't include the PIC register. */
11713 rtx addend
= NULL_RTX
;
11714 /* reg_addend is NULL or a multiple of some register. */
11715 rtx reg_addend
= NULL_RTX
;
11716 /* const_addend is NULL or a const_int. */
11717 rtx const_addend
= NULL_RTX
;
11718 /* This is the result, or NULL. */
11719 rtx result
= NULL_RTX
;
11728 if (GET_CODE (x
) == CONST
11729 && GET_CODE (XEXP (x
, 0)) == PLUS
11730 && GET_MODE (XEXP (x
, 0)) == Pmode
11731 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11732 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
11733 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
11735 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11736 base. A CONST can't be arg_pointer_rtx based. */
11737 if (base_term_p
&& MEM_P (orig_x
))
11739 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
11740 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
11741 if (MEM_P (orig_x
))
11742 x
= replace_equiv_address_nv (orig_x
, x
);
11746 if (GET_CODE (x
) == CONST
11747 && GET_CODE (XEXP (x
, 0)) == UNSPEC
11748 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
11749 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
11750 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
11752 x
= XVECEXP (XEXP (x
, 0), 0, 0);
11753 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
11755 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
11762 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
11763 return ix86_delegitimize_tls_address (orig_x
);
11765 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
11766 and -mcmodel=medium -fpic. */
11769 if (GET_CODE (x
) != PLUS
11770 || GET_CODE (XEXP (x
, 1)) != CONST
)
11771 return ix86_delegitimize_tls_address (orig_x
);
11773 if (ix86_pic_register_p (XEXP (x
, 0)))
11774 /* %ebx + GOT/GOTOFF */
11776 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
11778 /* %ebx + %reg * scale + GOT/GOTOFF */
11779 reg_addend
= XEXP (x
, 0);
11780 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
11781 reg_addend
= XEXP (reg_addend
, 1);
11782 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
11783 reg_addend
= XEXP (reg_addend
, 0);
11786 reg_addend
= NULL_RTX
;
11787 addend
= XEXP (x
, 0);
11791 addend
= XEXP (x
, 0);
11793 x
= XEXP (XEXP (x
, 1), 0);
11794 if (GET_CODE (x
) == PLUS
11795 && CONST_INT_P (XEXP (x
, 1)))
11797 const_addend
= XEXP (x
, 1);
11801 if (GET_CODE (x
) == UNSPEC
11802 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
11803 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
11804 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
11805 && !MEM_P (orig_x
) && !addend
)))
11806 result
= XVECEXP (x
, 0, 0);
11808 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
11809 && !MEM_P (orig_x
))
11810 result
= XVECEXP (x
, 0, 0);
11813 return ix86_delegitimize_tls_address (orig_x
);
11815 /* For (PLUS something CONST_INT) both find_base_{value,term} just
11816 recurse on the first operand. */
11817 if (const_addend
&& !base_term_p
)
11818 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
11820 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
11823 /* If the rest of original X doesn't involve the PIC register, add
11824 addend and subtract pic_offset_table_rtx. This can happen e.g.
11826 leal (%ebx, %ecx, 4), %ecx
11828 movl foo@GOTOFF(%ecx), %edx
11829 in which case we return (%ecx - %ebx) + foo
11830 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
11831 and reload has completed. Don't do the latter for debug,
11832 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
11833 if (pic_offset_table_rtx
11834 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
11835 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
11836 pic_offset_table_rtx
),
11838 else if (base_term_p
11839 && pic_offset_table_rtx
11841 && !TARGET_VXWORKS_RTP
)
11843 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
11844 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
11845 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
11850 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
11852 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
11853 if (result
== NULL_RTX
)
11859 /* The normal instantiation of the above template. */
11862 ix86_delegitimize_address (rtx x
)
11864 return ix86_delegitimize_address_1 (x
, false);
11867 /* If X is a machine specific address (i.e. a symbol or label being
11868 referenced as a displacement from the GOT implemented using an
11869 UNSPEC), then return the base term. Otherwise return X. */
11872 ix86_find_base_term (rtx x
)
11878 if (GET_CODE (x
) != CONST
)
11880 term
= XEXP (x
, 0);
11881 if (GET_CODE (term
) == PLUS
11882 && CONST_INT_P (XEXP (term
, 1)))
11883 term
= XEXP (term
, 0);
11884 if (GET_CODE (term
) != UNSPEC
11885 || (XINT (term
, 1) != UNSPEC_GOTPCREL
11886 && XINT (term
, 1) != UNSPEC_PCREL
))
11889 return XVECEXP (term
, 0, 0);
11892 return ix86_delegitimize_address_1 (x
, true);
11895 /* Return true if X shouldn't be emitted into the debug info.
11896 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
11897 symbol easily into the .debug_info section, so we need not to
11898 delegitimize, but instead assemble as @gotoff.
11899 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
11900 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
11903 ix86_const_not_ok_for_debug_p (rtx x
)
11905 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
11908 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
11915 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
11916 bool fp
, FILE *file
)
11918 const char *suffix
;
11920 if (mode
== CCFPmode
)
11922 code
= ix86_fp_compare_code_to_integer (code
);
11926 code
= reverse_condition (code
);
11931 gcc_assert (mode
!= CCGZmode
);
11955 gcc_assert (mode
!= CCGZmode
);
11979 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
11983 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11984 Those same assemblers have the same but opposite lossage on cmov. */
11985 if (mode
== CCmode
)
11986 suffix
= fp
? "nbe" : "a";
11988 gcc_unreachable ();
12005 gcc_unreachable ();
12009 if (mode
== CCmode
|| mode
== CCGZmode
)
12011 else if (mode
== CCCmode
)
12012 suffix
= fp
? "b" : "c";
12014 gcc_unreachable ();
12031 gcc_unreachable ();
12035 if (mode
== CCmode
|| mode
== CCGZmode
)
12037 else if (mode
== CCCmode
)
12038 suffix
= fp
? "nb" : "nc";
12040 gcc_unreachable ();
12043 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
12047 if (mode
== CCmode
)
12050 gcc_unreachable ();
12053 suffix
= fp
? "u" : "p";
12056 suffix
= fp
? "nu" : "np";
12059 gcc_unreachable ();
12061 fputs (suffix
, file
);
12064 /* Print the name of register X to FILE based on its machine mode and number.
12065 If CODE is 'w', pretend the mode is HImode.
12066 If CODE is 'b', pretend the mode is QImode.
12067 If CODE is 'k', pretend the mode is SImode.
12068 If CODE is 'q', pretend the mode is DImode.
12069 If CODE is 'x', pretend the mode is V4SFmode.
12070 If CODE is 't', pretend the mode is V8SFmode.
12071 If CODE is 'g', pretend the mode is V16SFmode.
12072 If CODE is 'h', pretend the reg is the 'high' byte register.
12073 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12074 If CODE is 'd', duplicate the operand for AVX instruction.
12075 If CODE is 'V', print naked full integer register name without %.
12079 print_reg (rtx x
, int code
, FILE *file
)
12083 unsigned int regno
;
12086 if (ASSEMBLER_DIALECT
== ASM_ATT
&& code
!= 'V')
12091 gcc_assert (TARGET_64BIT
);
12092 fputs ("rip", file
);
12096 if (code
== 'y' && STACK_TOP_P (x
))
12098 fputs ("st(0)", file
);
12104 else if (code
== 'b')
12106 else if (code
== 'k')
12108 else if (code
== 'q')
12110 else if (code
== 'h')
12112 else if (code
== 'x')
12114 else if (code
== 't')
12116 else if (code
== 'g')
12119 msize
= GET_MODE_SIZE (GET_MODE (x
));
12123 if (regno
== ARG_POINTER_REGNUM
12124 || regno
== FRAME_POINTER_REGNUM
12125 || regno
== FPSR_REG
)
12127 output_operand_lossage
12128 ("invalid use of register '%s'", reg_names
[regno
]);
12131 else if (regno
== FLAGS_REG
)
12133 output_operand_lossage ("invalid use of asm flag output");
12139 if (GENERAL_REGNO_P (regno
))
12140 msize
= GET_MODE_SIZE (word_mode
);
12142 error ("%<V%> modifier on non-integer register");
12145 duplicated
= code
== 'd' && TARGET_AVX
;
12152 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
12153 warning (0, "unsupported size for integer register");
12156 if (LEGACY_INT_REGNO_P (regno
))
12157 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
12161 reg
= hi_reg_name
[regno
];
12164 if (regno
>= ARRAY_SIZE (qi_reg_name
))
12166 if (!ANY_QI_REGNO_P (regno
))
12167 error ("unsupported size for integer register");
12168 reg
= qi_reg_name
[regno
];
12171 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
12173 reg
= qi_high_reg_name
[regno
];
12177 if (SSE_REGNO_P (regno
))
12179 gcc_assert (!duplicated
);
12180 putc (msize
== 32 ? 'y' : 'z', file
);
12181 reg
= hi_reg_name
[regno
] + 1;
12186 gcc_unreachable ();
12191 /* Irritatingly, AMD extended registers use
12192 different naming convention: "r%d[bwd]" */
12193 if (REX_INT_REGNO_P (regno
))
12195 gcc_assert (TARGET_64BIT
);
12199 error ("extended registers have no high halves");
12214 error ("unsupported operand size for extended register");
12222 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12223 fprintf (file
, ", %%%s", reg
);
12225 fprintf (file
, ", %s", reg
);
12229 /* Meaning of CODE:
12230 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12231 C -- print opcode suffix for set/cmov insn.
12232 c -- like C, but print reversed condition
12233 F,f -- likewise, but for floating-point.
12234 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12236 R -- print embedded rounding and sae.
12237 r -- print only sae.
12238 z -- print the opcode suffix for the size of the current operand.
12239 Z -- likewise, with special suffixes for x87 instructions.
12240 * -- print a star (in certain assembler syntax)
12241 A -- print an absolute memory reference.
12242 E -- print address with DImode register names if TARGET_64BIT.
12243 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12244 s -- print a shift double count, followed by the assemblers argument
12246 b -- print the QImode name of the register for the indicated operand.
12247 %b0 would print %al if operands[0] is reg 0.
12248 w -- likewise, print the HImode name of the register.
12249 k -- likewise, print the SImode name of the register.
12250 q -- likewise, print the DImode name of the register.
12251 x -- likewise, print the V4SFmode name of the register.
12252 t -- likewise, print the V8SFmode name of the register.
12253 g -- likewise, print the V16SFmode name of the register.
12254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12255 y -- print "st(0)" instead of "st" as a register.
12256 d -- print duplicated register operand for AVX instruction.
12257 D -- print condition for SSE cmp instruction.
12258 P -- if PIC, print an @PLT suffix.
12259 p -- print raw symbol name.
12260 X -- don't print any sort of PIC '@' suffix for a symbol.
12261 & -- print some in-use local-dynamic symbol name.
12262 H -- print a memory address offset by 8; used for sse high-parts
12263 Y -- print condition for XOP pcom* instruction.
12264 V -- print naked full integer register name without %.
12265 + -- print a branch hint as 'cs' or 'ds' prefix
12266 ; -- print a semicolon (after prefixes due to bug in older gas).
12267 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12268 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12269 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12270 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12274 ix86_print_operand (FILE *file
, rtx x
, int code
)
12281 switch (ASSEMBLER_DIALECT
)
12288 /* Intel syntax. For absolute addresses, registers should not
12289 be surrounded by braces. */
12293 ix86_print_operand (file
, x
, 0);
12300 gcc_unreachable ();
12303 ix86_print_operand (file
, x
, 0);
12307 /* Wrap address in an UNSPEC to declare special handling. */
12309 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
12311 output_address (VOIDmode
, x
);
12315 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12320 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12325 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12330 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12335 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12340 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12345 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12346 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
12349 switch (GET_MODE_SIZE (GET_MODE (x
)))
12364 output_operand_lossage ("invalid operand size for operand "
12374 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12376 /* Opcodes don't get size suffixes if using Intel opcodes. */
12377 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12380 switch (GET_MODE_SIZE (GET_MODE (x
)))
12399 output_operand_lossage ("invalid operand size for operand "
12405 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12406 warning (0, "non-integer operand used with operand code %<z%>");
12410 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12411 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12414 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12416 switch (GET_MODE_SIZE (GET_MODE (x
)))
12419 #ifdef HAVE_AS_IX86_FILDS
12429 #ifdef HAVE_AS_IX86_FILDQ
12432 fputs ("ll", file
);
12440 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12442 /* 387 opcodes don't get size suffixes
12443 if the operands are registers. */
12444 if (STACK_REG_P (x
))
12447 switch (GET_MODE_SIZE (GET_MODE (x
)))
12468 output_operand_lossage ("invalid operand type used with "
12469 "operand code 'Z'");
12473 output_operand_lossage ("invalid operand size for operand code 'Z'");
12493 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
12495 ix86_print_operand (file
, x
, 0);
12496 fputs (", ", file
);
12501 switch (GET_CODE (x
))
12504 fputs ("neq", file
);
12507 fputs ("eq", file
);
12511 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
12515 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
12519 fputs ("le", file
);
12523 fputs ("lt", file
);
12526 fputs ("unord", file
);
12529 fputs ("ord", file
);
12532 fputs ("ueq", file
);
12535 fputs ("nlt", file
);
12538 fputs ("nle", file
);
12541 fputs ("ule", file
);
12544 fputs ("ult", file
);
12547 fputs ("une", file
);
12550 output_operand_lossage ("operand is not a condition code, "
12551 "invalid operand code 'Y'");
12557 /* Little bit of braindamage here. The SSE compare instructions
12558 does use completely different names for the comparisons that the
12559 fp conditional moves. */
12560 switch (GET_CODE (x
))
12565 fputs ("eq_us", file
);
12570 fputs ("eq", file
);
12575 fputs ("nge", file
);
12580 fputs ("lt", file
);
12585 fputs ("ngt", file
);
12590 fputs ("le", file
);
12593 fputs ("unord", file
);
12598 fputs ("neq_oq", file
);
12603 fputs ("neq", file
);
12608 fputs ("ge", file
);
12613 fputs ("nlt", file
);
12618 fputs ("gt", file
);
12623 fputs ("nle", file
);
12626 fputs ("ord", file
);
12629 output_operand_lossage ("operand is not a condition code, "
12630 "invalid operand code 'D'");
12637 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12638 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12640 gcc_fallthrough ();
12645 if (!COMPARISON_P (x
))
12647 output_operand_lossage ("operand is not a condition code, "
12648 "invalid operand code '%c'", code
);
12651 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
12652 code
== 'c' || code
== 'f',
12653 code
== 'F' || code
== 'f',
12658 if (!offsettable_memref_p (x
))
12660 output_operand_lossage ("operand is not an offsettable memory "
12661 "reference, invalid operand code 'H'");
12664 /* It doesn't actually matter what mode we use here, as we're
12665 only going to use this for printing. */
12666 x
= adjust_address_nv (x
, DImode
, 8);
12667 /* Output 'qword ptr' for intel assembler dialect. */
12668 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12673 if (!CONST_INT_P (x
))
12675 output_operand_lossage ("operand is not an integer, invalid "
12676 "operand code 'K'");
12680 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
12681 #ifdef HAVE_AS_IX86_HLE
12682 fputs ("xacquire ", file
);
12684 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
12686 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
12687 #ifdef HAVE_AS_IX86_HLE
12688 fputs ("xrelease ", file
);
12690 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
12692 /* We do not want to print value of the operand. */
12696 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
12697 fputs ("{z}", file
);
12701 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
12703 output_operand_lossage ("operand is not a specific integer, "
12704 "invalid operand code 'r'");
12708 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12709 fputs (", ", file
);
12711 fputs ("{sae}", file
);
12713 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12714 fputs (", ", file
);
12719 if (!CONST_INT_P (x
))
12721 output_operand_lossage ("operand is not an integer, invalid "
12722 "operand code 'R'");
12726 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12727 fputs (", ", file
);
12729 switch (INTVAL (x
))
12731 case ROUND_NEAREST_INT
| ROUND_SAE
:
12732 fputs ("{rn-sae}", file
);
12734 case ROUND_NEG_INF
| ROUND_SAE
:
12735 fputs ("{rd-sae}", file
);
12737 case ROUND_POS_INF
| ROUND_SAE
:
12738 fputs ("{ru-sae}", file
);
12740 case ROUND_ZERO
| ROUND_SAE
:
12741 fputs ("{rz-sae}", file
);
12744 output_operand_lossage ("operand is not a specific integer, "
12745 "invalid operand code 'R'");
12748 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12749 fputs (", ", file
);
12754 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12760 const char *name
= get_some_local_dynamic_name ();
12762 output_operand_lossage ("'%%&' used without any "
12763 "local dynamic TLS references");
12765 assemble_name (file
, name
);
12774 || optimize_function_for_size_p (cfun
)
12775 || !TARGET_BRANCH_PREDICTION_HINTS
)
12778 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
12781 int pred_val
= profile_probability::from_reg_br_prob_note
12782 (XINT (x
, 0)).to_reg_br_prob_base ();
12784 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
12785 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
12787 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
12789 = final_forward_branch_p (current_output_insn
) == 0;
12791 /* Emit hints only in the case default branch prediction
12792 heuristics would fail. */
12793 if (taken
!= cputaken
)
12795 /* We use 3e (DS) prefix for taken branches and
12796 2e (CS) prefix for not taken branches. */
12798 fputs ("ds ; ", file
);
12800 fputs ("cs ; ", file
);
12808 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12814 putc (TARGET_AVX2
? 'i' : 'f', file
);
12820 /* NB: 32-bit indices in VSIB address are sign-extended
12821 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
12822 sign-extended to 0xfffffffff7fa3010 which is invalid
12823 address. Add addr32 prefix if there is no base
12824 register nor symbol. */
12826 struct ix86_address parts
;
12827 ok
= ix86_decompose_address (x
, &parts
);
12828 gcc_assert (ok
&& parts
.index
== NULL_RTX
);
12829 if (parts
.base
== NULL_RTX
12830 && (parts
.disp
== NULL_RTX
12831 || !symbolic_operand (parts
.disp
,
12832 GET_MODE (parts
.disp
))))
12833 fputs ("addr32 ", file
);
12838 if (TARGET_64BIT
&& Pmode
!= word_mode
)
12839 fputs ("addr32 ", file
);
12843 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
12844 fputs ("notrack ", file
);
12848 output_operand_lossage ("invalid operand code '%c'", code
);
12853 print_reg (x
, code
, file
);
12855 else if (MEM_P (x
))
12857 rtx addr
= XEXP (x
, 0);
12859 /* No `byte ptr' prefix for call instructions ... */
12860 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
12862 machine_mode mode
= GET_MODE (x
);
12865 /* Check for explicit size override codes. */
12868 else if (code
== 'w')
12870 else if (code
== 'k')
12872 else if (code
== 'q')
12874 else if (code
== 'x')
12876 else if (code
== 't')
12878 else if (code
== 'g')
12880 else if (mode
== BLKmode
)
12881 /* ... or BLKmode operands, when not overridden. */
12884 switch (GET_MODE_SIZE (mode
))
12886 case 1: size
= "BYTE"; break;
12887 case 2: size
= "WORD"; break;
12888 case 4: size
= "DWORD"; break;
12889 case 8: size
= "QWORD"; break;
12890 case 12: size
= "TBYTE"; break;
12892 if (mode
== XFmode
)
12897 case 32: size
= "YMMWORD"; break;
12898 case 64: size
= "ZMMWORD"; break;
12900 gcc_unreachable ();
12904 fputs (size
, file
);
12905 fputs (" PTR ", file
);
12909 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
12910 output_operand_lossage ("invalid constraints for operand");
12912 ix86_print_operand_address_as
12913 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
12916 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
12920 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
12922 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12924 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12926 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
12927 (unsigned long long) (int) l
);
12929 fprintf (file
, "0x%08x", (unsigned int) l
);
12932 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
12936 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
12938 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12940 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
12943 /* These float cases don't actually occur as immediate operands. */
12944 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
12948 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
12949 fputs (dstr
, file
);
12954 /* We have patterns that allow zero sets of memory, for instance.
12955 In 64-bit mode, we should probably support all 8-byte vectors,
12956 since we can in fact encode that into an immediate. */
12957 if (GET_CODE (x
) == CONST_VECTOR
)
12959 if (x
!= CONST0_RTX (GET_MODE (x
)))
12960 output_operand_lossage ("invalid vector immediate");
12964 if (code
!= 'P' && code
!= 'p')
12966 if (CONST_INT_P (x
))
12968 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12971 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
12972 || GET_CODE (x
) == LABEL_REF
)
12974 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12977 fputs ("OFFSET FLAT:", file
);
12980 if (CONST_INT_P (x
))
12981 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12982 else if (flag_pic
|| MACHOPIC_INDIRECT
)
12983 output_pic_addr_const (file
, x
, code
);
12985 output_addr_const (file
, x
);
12990 ix86_print_operand_punct_valid_p (unsigned char code
)
12992 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
12993 || code
== '~' || code
== '^' || code
== '!');
12996 /* Print a memory operand whose address is ADDR. */
12999 ix86_print_operand_address_as (FILE *file
, rtx addr
,
13000 addr_space_t as
, bool no_rip
)
13002 struct ix86_address parts
;
13003 rtx base
, index
, disp
;
13009 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
13011 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13012 gcc_assert (parts
.index
== NULL_RTX
);
13013 parts
.index
= XVECEXP (addr
, 0, 1);
13014 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
13015 addr
= XVECEXP (addr
, 0, 0);
13018 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
13020 gcc_assert (TARGET_64BIT
);
13021 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13025 ok
= ix86_decompose_address (addr
, &parts
);
13030 index
= parts
.index
;
13032 scale
= parts
.scale
;
13034 if (ADDR_SPACE_GENERIC_P (as
))
13037 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
13039 if (!ADDR_SPACE_GENERIC_P (as
))
13041 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13046 case ADDR_SPACE_SEG_FS
:
13047 fputs ("fs:", file
);
13049 case ADDR_SPACE_SEG_GS
:
13050 fputs ("gs:", file
);
13053 gcc_unreachable ();
13057 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13058 if (TARGET_64BIT
&& !base
&& !index
&& !no_rip
)
13062 if (GET_CODE (disp
) == CONST
13063 && GET_CODE (XEXP (disp
, 0)) == PLUS
13064 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13065 symbol
= XEXP (XEXP (disp
, 0), 0);
13067 if (GET_CODE (symbol
) == LABEL_REF
13068 || (GET_CODE (symbol
) == SYMBOL_REF
13069 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
13073 if (!base
&& !index
)
13075 /* Displacement only requires special attention. */
13076 if (CONST_INT_P (disp
))
13078 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
13079 fputs ("ds:", file
);
13080 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
13082 /* Load the external function address via the GOT slot to avoid PLT. */
13083 else if (GET_CODE (disp
) == CONST
13084 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
13085 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
13086 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
13087 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
13088 output_pic_addr_const (file
, disp
, 0);
13090 output_pic_addr_const (file
, disp
, 0);
13092 output_addr_const (file
, disp
);
13096 /* Print SImode register names to force addr32 prefix. */
13097 if (SImode_address_operand (addr
, VOIDmode
))
13101 gcc_assert (TARGET_64BIT
);
13102 switch (GET_CODE (addr
))
13105 gcc_assert (GET_MODE (addr
) == SImode
);
13106 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
13110 gcc_assert (GET_MODE (addr
) == DImode
);
13113 gcc_unreachable ();
13116 gcc_assert (!code
);
13122 && CONST_INT_P (disp
)
13123 && INTVAL (disp
) < -16*1024*1024)
13125 /* X32 runs in 64-bit mode, where displacement, DISP, in
13126 address DISP(%r64), is encoded as 32-bit immediate sign-
13127 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13128 address is %r64 + 0xffffffffbffffd00. When %r64 <
13129 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13130 which is invalid for x32. The correct address is %r64
13131 - 0x40000300 == 0xf7ffdd64. To properly encode
13132 -0x40000300(%r64) for x32, we zero-extend negative
13133 displacement by forcing addr32 prefix which truncates
13134 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13135 zero-extend all negative displacements, including -1(%rsp).
13136 However, for small negative displacements, sign-extension
13137 won't cause overflow. We only zero-extend negative
13138 displacements if they < -16*1024*1024, which is also used
13139 to check legitimate address displacements for PIC. */
13143 /* Since the upper 32 bits of RSP are always zero for x32,
13144 we can encode %esp as %rsp to avoid 0x67 prefix if
13145 there is no index register. */
13146 if (TARGET_X32
&& Pmode
== SImode
13147 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
13150 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13155 output_pic_addr_const (file
, disp
, 0);
13156 else if (GET_CODE (disp
) == LABEL_REF
)
13157 output_asm_label (disp
);
13159 output_addr_const (file
, disp
);
13164 print_reg (base
, code
, file
);
13168 print_reg (index
, vsib
? 0 : code
, file
);
13169 if (scale
!= 1 || vsib
)
13170 fprintf (file
, ",%d", scale
);
13176 rtx offset
= NULL_RTX
;
13180 /* Pull out the offset of a symbol; print any symbol itself. */
13181 if (GET_CODE (disp
) == CONST
13182 && GET_CODE (XEXP (disp
, 0)) == PLUS
13183 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13185 offset
= XEXP (XEXP (disp
, 0), 1);
13186 disp
= gen_rtx_CONST (VOIDmode
,
13187 XEXP (XEXP (disp
, 0), 0));
13191 output_pic_addr_const (file
, disp
, 0);
13192 else if (GET_CODE (disp
) == LABEL_REF
)
13193 output_asm_label (disp
);
13194 else if (CONST_INT_P (disp
))
13197 output_addr_const (file
, disp
);
13203 print_reg (base
, code
, file
);
13206 if (INTVAL (offset
) >= 0)
13208 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13212 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13219 print_reg (index
, vsib
? 0 : code
, file
);
13220 if (scale
!= 1 || vsib
)
13221 fprintf (file
, "*%d", scale
);
13229 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
13231 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
13234 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13237 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
13241 if (GET_CODE (x
) != UNSPEC
)
13244 op
= XVECEXP (x
, 0, 0);
13245 switch (XINT (x
, 1))
13247 case UNSPEC_GOTOFF
:
13248 output_addr_const (file
, op
);
13249 fputs ("@gotoff", file
);
13251 case UNSPEC_GOTTPOFF
:
13252 output_addr_const (file
, op
);
13253 /* FIXME: This might be @TPOFF in Sun ld. */
13254 fputs ("@gottpoff", file
);
13257 output_addr_const (file
, op
);
13258 fputs ("@tpoff", file
);
13260 case UNSPEC_NTPOFF
:
13261 output_addr_const (file
, op
);
13263 fputs ("@tpoff", file
);
13265 fputs ("@ntpoff", file
);
13267 case UNSPEC_DTPOFF
:
13268 output_addr_const (file
, op
);
13269 fputs ("@dtpoff", file
);
13271 case UNSPEC_GOTNTPOFF
:
13272 output_addr_const (file
, op
);
13274 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13275 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
13277 fputs ("@gotntpoff", file
);
13279 case UNSPEC_INDNTPOFF
:
13280 output_addr_const (file
, op
);
13281 fputs ("@indntpoff", file
);
13284 case UNSPEC_MACHOPIC_OFFSET
:
13285 output_addr_const (file
, op
);
13287 machopic_output_function_base_name (file
);
13299 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13300 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13301 is the expression of the binary operation. The output may either be
13302 emitted here, or returned to the caller, like all output_* functions.
13304 There is no guarantee that the operands are the same mode, as they
13305 might be within FLOAT or FLOAT_EXTEND expressions. */
13307 #ifndef SYSV386_COMPAT
13308 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13309 wants to fix the assemblers because that causes incompatibility
13310 with gcc. No-one wants to fix gcc because that causes
13311 incompatibility with assemblers... You can use the option of
13312 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13313 #define SYSV386_COMPAT 1
13317 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
13319 static char buf
[40];
13322 = (SSE_REG_P (operands
[0])
13323 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
13327 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
13328 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
13335 switch (GET_CODE (operands
[3]))
13346 gcc_unreachable ();
13353 p
= (GET_MODE (operands
[0]) == SFmode
) ? "ss" : "sd";
13357 p
= "\t{%2, %1, %0|%0, %1, %2}";
13359 p
= "\t{%2, %0|%0, %2}";
13365 /* Even if we do not want to check the inputs, this documents input
13366 constraints. Which helps in understanding the following code. */
13369 if (STACK_REG_P (operands
[0])
13370 && ((REG_P (operands
[1])
13371 && REGNO (operands
[0]) == REGNO (operands
[1])
13372 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
13373 || (REG_P (operands
[2])
13374 && REGNO (operands
[0]) == REGNO (operands
[2])
13375 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
13376 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
13379 gcc_unreachable ();
13382 switch (GET_CODE (operands
[3]))
13386 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
13387 std::swap (operands
[1], operands
[2]);
13389 /* know operands[0] == operands[1]. */
13391 if (MEM_P (operands
[2]))
13397 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13399 if (STACK_TOP_P (operands
[0]))
13400 /* How is it that we are storing to a dead operand[2]?
13401 Well, presumably operands[1] is dead too. We can't
13402 store the result to st(0) as st(0) gets popped on this
13403 instruction. Instead store to operands[2] (which I
13404 think has to be st(1)). st(1) will be popped later.
13405 gcc <= 2.8.1 didn't have this check and generated
13406 assembly code that the Unixware assembler rejected. */
13407 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13409 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13413 if (STACK_TOP_P (operands
[0]))
13414 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13416 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13421 if (MEM_P (operands
[1]))
13427 if (MEM_P (operands
[2]))
13433 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13436 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13437 derived assemblers, confusingly reverse the direction of
13438 the operation for fsub{r} and fdiv{r} when the
13439 destination register is not st(0). The Intel assembler
13440 doesn't have this brain damage. Read !SYSV386_COMPAT to
13441 figure out what the hardware really does. */
13442 if (STACK_TOP_P (operands
[0]))
13443 p
= "{p\t%0, %2|rp\t%2, %0}";
13445 p
= "{rp\t%2, %0|p\t%0, %2}";
13447 if (STACK_TOP_P (operands
[0]))
13448 /* As above for fmul/fadd, we can't store to st(0). */
13449 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13451 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13456 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
13459 if (STACK_TOP_P (operands
[0]))
13460 p
= "{rp\t%0, %1|p\t%1, %0}";
13462 p
= "{p\t%1, %0|rp\t%0, %1}";
13464 if (STACK_TOP_P (operands
[0]))
13465 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13467 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13472 if (STACK_TOP_P (operands
[0]))
13474 if (STACK_TOP_P (operands
[1]))
13475 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13477 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13480 else if (STACK_TOP_P (operands
[1]))
13483 p
= "{\t%1, %0|r\t%0, %1}";
13485 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13491 p
= "{r\t%2, %0|\t%0, %2}";
13493 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13499 gcc_unreachable ();
13506 /* Return needed mode for entity in optimize_mode_switching pass. */
13509 ix86_dirflag_mode_needed (rtx_insn
*insn
)
13513 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
13514 return X86_DIRFLAG_ANY
;
13516 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
13517 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
13520 if (recog_memoized (insn
) < 0)
13521 return X86_DIRFLAG_ANY
;
13523 if (get_attr_type (insn
) == TYPE_STR
)
13525 /* Emit cld instruction if stringops are used in the function. */
13526 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
13527 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
13529 return X86_DIRFLAG_RESET
;
13532 return X86_DIRFLAG_ANY
;
13535 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
13538 ix86_check_avx_upper_register (const_rtx exp
)
13540 return SSE_REG_P (exp
) && GET_MODE_BITSIZE (GET_MODE (exp
)) > 128;
13543 /* Return needed mode for entity in optimize_mode_switching pass. */
13546 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
13552 /* Needed mode is set to AVX_U128_CLEAN if there are
13553 no 256bit or 512bit modes used in function arguments. */
13554 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
13556 link
= XEXP (link
, 1))
13558 if (GET_CODE (XEXP (link
, 0)) == USE
)
13560 rtx arg
= XEXP (XEXP (link
, 0), 0);
13562 if (ix86_check_avx_upper_register (arg
))
13563 return AVX_U128_DIRTY
;
13567 return AVX_U128_CLEAN
;
13570 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13571 Hardware changes state only when a 256bit register is written to,
13572 but we need to prevent the compiler from moving optimal insertion
13573 point above eventual read from 256bit or 512 bit register. */
13574 subrtx_iterator::array_type array
;
13575 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
13576 if (ix86_check_avx_upper_register (*iter
))
13577 return AVX_U128_DIRTY
;
13579 return AVX_U128_ANY
;
13582 /* Return mode that i387 must be switched into
13583 prior to the execution of insn. */
13586 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
13588 enum attr_i387_cw mode
;
13590 /* The mode UNINITIALIZED is used to store control word after a
13591 function call or ASM pattern. The mode ANY specify that function
13592 has no requirements on the control word and make no changes in the
13593 bits we are interested in. */
13596 || (NONJUMP_INSN_P (insn
)
13597 && (asm_noperands (PATTERN (insn
)) >= 0
13598 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
13599 return I387_CW_UNINITIALIZED
;
13601 if (recog_memoized (insn
) < 0)
13602 return I387_CW_ANY
;
13604 mode
= get_attr_i387_cw (insn
);
13609 if (mode
== I387_CW_TRUNC
)
13614 if (mode
== I387_CW_FLOOR
)
13619 if (mode
== I387_CW_CEIL
)
13624 gcc_unreachable ();
13627 return I387_CW_ANY
;
13630 /* Return mode that entity must be switched into
13631 prior to the execution of insn. */
13634 ix86_mode_needed (int entity
, rtx_insn
*insn
)
13639 return ix86_dirflag_mode_needed (insn
);
13641 return ix86_avx_u128_mode_needed (insn
);
13645 return ix86_i387_mode_needed (entity
, insn
);
13647 gcc_unreachable ();
13652 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
13655 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
13657 if (ix86_check_avx_upper_register (dest
))
13659 bool *used
= (bool *) data
;
13664 /* Calculate mode of upper 128bit AVX registers after the insn. */
13667 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
13669 rtx pat
= PATTERN (insn
);
13671 if (vzeroupper_pattern (pat
, VOIDmode
)
13672 || vzeroall_pattern (pat
, VOIDmode
))
13673 return AVX_U128_CLEAN
;
13675 /* We know that state is clean after CALL insn if there are no
13676 256bit or 512bit registers used in the function return register. */
13679 bool avx_upper_reg_found
= false;
13680 note_stores (pat
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
13682 return avx_upper_reg_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
13685 /* Otherwise, return current mode. Remember that if insn
13686 references AVX 256bit or 512bit registers, the mode was already
13687 changed to DIRTY from MODE_NEEDED. */
13691 /* Return the mode that an insn results in. */
13694 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
13701 return ix86_avx_u128_mode_after (mode
, insn
);
13707 gcc_unreachable ();
13712 ix86_dirflag_mode_entry (void)
13714 /* For TARGET_CLD or in the interrupt handler we can't assume
13715 direction flag state at function entry. */
13717 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
13718 return X86_DIRFLAG_ANY
;
13720 return X86_DIRFLAG_RESET
;
13724 ix86_avx_u128_mode_entry (void)
13728 /* Entry mode is set to AVX_U128_DIRTY if there are
13729 256bit or 512bit modes used in function arguments. */
13730 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
13731 arg
= TREE_CHAIN (arg
))
13733 rtx incoming
= DECL_INCOMING_RTL (arg
);
13735 if (incoming
&& ix86_check_avx_upper_register (incoming
))
13736 return AVX_U128_DIRTY
;
13739 return AVX_U128_CLEAN
;
13742 /* Return a mode that ENTITY is assumed to be
13743 switched to at function entry. */
13746 ix86_mode_entry (int entity
)
13751 return ix86_dirflag_mode_entry ();
13753 return ix86_avx_u128_mode_entry ();
13757 return I387_CW_ANY
;
13759 gcc_unreachable ();
13764 ix86_avx_u128_mode_exit (void)
13766 rtx reg
= crtl
->return_rtx
;
13768 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
13769 or 512 bit modes used in the function return register. */
13770 if (reg
&& ix86_check_avx_upper_register (reg
))
13771 return AVX_U128_DIRTY
;
13773 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
13774 modes used in function arguments, otherwise return AVX_U128_CLEAN.
13776 return ix86_avx_u128_mode_entry ();
13779 /* Return a mode that ENTITY is assumed to be
13780 switched to at function exit. */
13783 ix86_mode_exit (int entity
)
13788 return X86_DIRFLAG_ANY
;
13790 return ix86_avx_u128_mode_exit ();
13794 return I387_CW_ANY
;
13796 gcc_unreachable ();
13801 ix86_mode_priority (int, int n
)
13806 /* Output code to initialize control word copies used by trunc?f?i and
13807 rounding patterns. CURRENT_MODE is set to current control word,
13808 while NEW_MODE is set to new control word. */
13811 emit_i387_cw_initialization (int mode
)
13813 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
13816 enum ix86_stack_slot slot
;
13818 rtx reg
= gen_reg_rtx (HImode
);
13820 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
13821 emit_move_insn (reg
, copy_rtx (stored_mode
));
13825 case I387_CW_TRUNC
:
13826 /* round toward zero (truncate) */
13827 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
13828 slot
= SLOT_CW_TRUNC
;
13831 case I387_CW_FLOOR
:
13832 /* round down toward -oo */
13833 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
13834 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
13835 slot
= SLOT_CW_FLOOR
;
13839 /* round up toward +oo */
13840 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
13841 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
13842 slot
= SLOT_CW_CEIL
;
13846 gcc_unreachable ();
13849 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
13851 new_mode
= assign_386_stack_local (HImode
, slot
);
13852 emit_move_insn (new_mode
, reg
);
13855 /* Generate one or more insns to set ENTITY to MODE. */
13858 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
13859 HARD_REG_SET regs_live ATTRIBUTE_UNUSED
)
13864 if (mode
== X86_DIRFLAG_RESET
)
13865 emit_insn (gen_cld ());
13868 if (mode
== AVX_U128_CLEAN
)
13869 emit_insn (gen_avx_vzeroupper ());
13874 if (mode
!= I387_CW_ANY
13875 && mode
!= I387_CW_UNINITIALIZED
)
13876 emit_i387_cw_initialization (mode
);
13879 gcc_unreachable ();
13883 /* Output code for INSN to convert a float to a signed int. OPERANDS
13884 are the insn operands. The output may be [HSD]Imode and the input
13885 operand may be [SDX]Fmode. */
13888 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
13890 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
13891 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
13892 int round_mode
= get_attr_i387_cw (insn
);
13894 static char buf
[40];
13897 /* Jump through a hoop or two for DImode, since the hardware has no
13898 non-popping instruction. We used to do this a different way, but
13899 that was somewhat fragile and broke with post-reload splitters. */
13900 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
13901 output_asm_insn ("fld\t%y1", operands
);
13903 gcc_assert (STACK_TOP_P (operands
[1]));
13904 gcc_assert (MEM_P (operands
[0]));
13905 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
13908 return "fisttp%Z0\t%0";
13910 strcpy (buf
, "fist");
13912 if (round_mode
!= I387_CW_ANY
)
13913 output_asm_insn ("fldcw\t%3", operands
);
13916 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
13918 output_asm_insn (buf
, operands
);
13920 if (round_mode
!= I387_CW_ANY
)
13921 output_asm_insn ("fldcw\t%2", operands
);
13926 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13927 have the values zero or one, indicates the ffreep insn's operand
13928 from the OPERANDS array. */
13930 static const char *
13931 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
13933 if (TARGET_USE_FFREEP
)
13934 #ifdef HAVE_AS_IX86_FFREEP
13935 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
13938 static char retval
[32];
13939 int regno
= REGNO (operands
[opno
]);
13941 gcc_assert (STACK_REGNO_P (regno
));
13943 regno
-= FIRST_STACK_REG
;
13945 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
13950 return opno
? "fstp\t%y1" : "fstp\t%y0";
13954 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13955 should be used. UNORDERED_P is true when fucom should be used. */
13958 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
13959 bool eflags_p
, bool unordered_p
)
13961 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
13962 bool stack_top_dies
;
13964 static char buf
[40];
13967 gcc_assert (STACK_TOP_P (xops
[0]));
13969 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
13973 p
= unordered_p
? "fucomi" : "fcomi";
13976 p
= "p\t{%y1, %0|%0, %y1}";
13977 strcat (buf
, p
+ !stack_top_dies
);
13982 if (STACK_REG_P (xops
[1])
13984 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
13986 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
13988 /* If both the top of the 387 stack die, and the other operand
13989 is also a stack register that dies, then this must be a
13990 `fcompp' float compare. */
13991 p
= unordered_p
? "fucompp" : "fcompp";
13994 else if (const0_operand (xops
[1], VOIDmode
))
13996 gcc_assert (!unordered_p
);
13997 strcpy (buf
, "ftst");
14001 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
14003 gcc_assert (!unordered_p
);
14007 p
= unordered_p
? "fucom" : "fcom";
14012 strcat (buf
, p
+ !stack_top_dies
);
14015 output_asm_insn (buf
, operands
);
14016 return "fnstsw\t%0";
14020 ix86_output_addr_vec_elt (FILE *file
, int value
)
14022 const char *directive
= ASM_LONG
;
14026 directive
= ASM_QUAD
;
14028 gcc_assert (!TARGET_64BIT
);
14031 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
14035 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
14037 const char *directive
= ASM_LONG
;
14040 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
14041 directive
= ASM_QUAD
;
14043 gcc_assert (!TARGET_64BIT
);
14045 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14046 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
14047 fprintf (file
, "%s%s%d-%s%d\n",
14048 directive
, LPREFIX
, value
, LPREFIX
, rel
);
14050 else if (TARGET_MACHO
)
14052 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
14053 machopic_output_function_base_name (file
);
14057 else if (HAVE_AS_GOTOFF_IN_DATA
)
14058 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
14060 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
14061 GOT_SYMBOL_NAME
, LPREFIX
, value
);
14064 #define LEA_MAX_STALL (3)
14065 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14067 /* Increase given DISTANCE in half-cycles according to
14068 dependencies between PREV and NEXT instructions.
14069 Add 1 half-cycle if there is no dependency and
14070 go to next cycle if there is some dependecy. */
14072 static unsigned int
14073 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
14077 if (!prev
|| !next
)
14078 return distance
+ (distance
& 1) + 2;
14080 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
14081 return distance
+ 1;
14083 FOR_EACH_INSN_USE (use
, next
)
14084 FOR_EACH_INSN_DEF (def
, prev
)
14085 if (!DF_REF_IS_ARTIFICIAL (def
)
14086 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
14087 return distance
+ (distance
& 1) + 2;
14089 return distance
+ 1;
14092 /* Function checks if instruction INSN defines register number
14093 REGNO1 or REGNO2. */
14096 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
14101 FOR_EACH_INSN_DEF (def
, insn
)
14102 if (DF_REF_REG_DEF_P (def
)
14103 && !DF_REF_IS_ARTIFICIAL (def
)
14104 && (regno1
== DF_REF_REGNO (def
)
14105 || regno2
== DF_REF_REGNO (def
)))
14111 /* Function checks if instruction INSN uses register number
14112 REGNO as a part of address expression. */
14115 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
14119 FOR_EACH_INSN_USE (use
, insn
)
14120 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
14126 /* Search backward for non-agu definition of register number REGNO1
14127 or register number REGNO2 in basic block starting from instruction
14128 START up to head of basic block or instruction INSN.
14130 Function puts true value into *FOUND var if definition was found
14131 and false otherwise.
14133 Distance in half-cycles between START and found instruction or head
14134 of BB is added to DISTANCE and returned. */
14137 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
14138 rtx_insn
*insn
, int distance
,
14139 rtx_insn
*start
, bool *found
)
14141 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
14142 rtx_insn
*prev
= start
;
14143 rtx_insn
*next
= NULL
;
14149 && distance
< LEA_SEARCH_THRESHOLD
)
14151 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
14153 distance
= increase_distance (prev
, next
, distance
);
14154 if (insn_defines_reg (regno1
, regno2
, prev
))
14156 if (recog_memoized (prev
) < 0
14157 || get_attr_type (prev
) != TYPE_LEA
)
14166 if (prev
== BB_HEAD (bb
))
14169 prev
= PREV_INSN (prev
);
14175 /* Search backward for non-agu definition of register number REGNO1
14176 or register number REGNO2 in INSN's basic block until
14177 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14178 2. Reach neighbor BBs boundary, or
14179 3. Reach agu definition.
14180 Returns the distance between the non-agu definition point and INSN.
14181 If no definition point, returns -1. */
14184 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
14187 basic_block bb
= BLOCK_FOR_INSN (insn
);
14189 bool found
= false;
14191 if (insn
!= BB_HEAD (bb
))
14192 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
14193 distance
, PREV_INSN (insn
),
14196 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
14200 bool simple_loop
= false;
14202 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14205 simple_loop
= true;
14210 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
14212 BB_END (bb
), &found
);
14215 int shortest_dist
= -1;
14216 bool found_in_bb
= false;
14218 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14221 = distance_non_agu_define_in_bb (regno1
, regno2
,
14227 if (shortest_dist
< 0)
14228 shortest_dist
= bb_dist
;
14229 else if (bb_dist
> 0)
14230 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14236 distance
= shortest_dist
;
14240 /* get_attr_type may modify recog data. We want to make sure
14241 that recog data is valid for instruction INSN, on which
14242 distance_non_agu_define is called. INSN is unchanged here. */
14243 extract_insn_cached (insn
);
14248 return distance
>> 1;
14251 /* Return the distance in half-cycles between INSN and the next
14252 insn that uses register number REGNO in memory address added
14253 to DISTANCE. Return -1 if REGNO0 is set.
14255 Put true value into *FOUND if register usage was found and
14257 Put true value into *REDEFINED if register redefinition was
14258 found and false otherwise. */
14261 distance_agu_use_in_bb (unsigned int regno
,
14262 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
14263 bool *found
, bool *redefined
)
14265 basic_block bb
= NULL
;
14266 rtx_insn
*next
= start
;
14267 rtx_insn
*prev
= NULL
;
14270 *redefined
= false;
14272 if (start
!= NULL_RTX
)
14274 bb
= BLOCK_FOR_INSN (start
);
14275 if (start
!= BB_HEAD (bb
))
14276 /* If insn and start belong to the same bb, set prev to insn,
14277 so the call to increase_distance will increase the distance
14278 between insns by 1. */
14284 && distance
< LEA_SEARCH_THRESHOLD
)
14286 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
14288 distance
= increase_distance(prev
, next
, distance
);
14289 if (insn_uses_reg_mem (regno
, next
))
14291 /* Return DISTANCE if OP0 is used in memory
14292 address in NEXT. */
14297 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
14299 /* Return -1 if OP0 is set in NEXT. */
14307 if (next
== BB_END (bb
))
14310 next
= NEXT_INSN (next
);
14316 /* Return the distance between INSN and the next insn that uses
14317 register number REGNO0 in memory address. Return -1 if no such
14318 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14321 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
14323 basic_block bb
= BLOCK_FOR_INSN (insn
);
14325 bool found
= false;
14326 bool redefined
= false;
14328 if (insn
!= BB_END (bb
))
14329 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
14331 &found
, &redefined
);
14333 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
14337 bool simple_loop
= false;
14339 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14342 simple_loop
= true;
14347 distance
= distance_agu_use_in_bb (regno0
, insn
,
14348 distance
, BB_HEAD (bb
),
14349 &found
, &redefined
);
14352 int shortest_dist
= -1;
14353 bool found_in_bb
= false;
14354 bool redefined_in_bb
= false;
14356 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14359 = distance_agu_use_in_bb (regno0
, insn
,
14360 distance
, BB_HEAD (e
->dest
),
14361 &found_in_bb
, &redefined_in_bb
);
14364 if (shortest_dist
< 0)
14365 shortest_dist
= bb_dist
;
14366 else if (bb_dist
> 0)
14367 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14373 distance
= shortest_dist
;
14377 if (!found
|| redefined
)
14380 return distance
>> 1;
14383 /* Define this macro to tune LEA priority vs ADD, it take effect when
14384 there is a dilemma of choicing LEA or ADD
14385 Negative value: ADD is more preferred than LEA
14387 Positive value: LEA is more preferred than ADD*/
14388 #define IX86_LEA_PRIORITY 0
14390 /* Return true if usage of lea INSN has performance advantage
14391 over a sequence of instructions. Instructions sequence has
14392 SPLIT_COST cycles higher latency than lea latency. */
14395 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
14396 unsigned int regno2
, int split_cost
, bool has_scale
)
14398 int dist_define
, dist_use
;
14400 /* For Silvermont if using a 2-source or 3-source LEA for
14401 non-destructive destination purposes, or due to wanting
14402 ability to use SCALE, the use of LEA is justified. */
14403 if (TARGET_SILVERMONT
|| TARGET_GOLDMONT
|| TARGET_GOLDMONT_PLUS
14404 || TARGET_TREMONT
|| TARGET_INTEL
)
14408 if (split_cost
< 1)
14410 if (regno0
== regno1
|| regno0
== regno2
)
14415 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
14416 dist_use
= distance_agu_use (regno0
, insn
);
14418 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
14420 /* If there is no non AGU operand definition, no AGU
14421 operand usage and split cost is 0 then both lea
14422 and non lea variants have same priority. Currently
14423 we prefer lea for 64 bit code and non lea on 32 bit
14425 if (dist_use
< 0 && split_cost
== 0)
14426 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
14431 /* With longer definitions distance lea is more preferable.
14432 Here we change it to take into account splitting cost and
14434 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
14436 /* If there is no use in memory addess then we just check
14437 that split cost exceeds AGU stall. */
14439 return dist_define
> LEA_MAX_STALL
;
14441 /* If this insn has both backward non-agu dependence and forward
14442 agu dependence, the one with short distance takes effect. */
14443 return dist_define
>= dist_use
;
14446 /* Return true if it is legal to clobber flags by INSN and
14447 false otherwise. */
14450 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
14452 basic_block bb
= BLOCK_FOR_INSN (insn
);
14458 if (NONDEBUG_INSN_P (insn
))
14460 FOR_EACH_INSN_USE (use
, insn
)
14461 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
14464 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
14468 if (insn
== BB_END (bb
))
14471 insn
= NEXT_INSN (insn
);
14474 live
= df_get_live_out(bb
);
14475 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
14478 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14479 move and add to avoid AGU stalls. */
14482 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
14484 unsigned int regno0
, regno1
, regno2
;
14486 /* Check if we need to optimize. */
14487 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
14490 /* Check it is correct to split here. */
14491 if (!ix86_ok_to_clobber_flags(insn
))
14494 regno0
= true_regnum (operands
[0]);
14495 regno1
= true_regnum (operands
[1]);
14496 regno2
= true_regnum (operands
[2]);
14498 /* We need to split only adds with non destructive
14499 destination operand. */
14500 if (regno0
== regno1
|| regno0
== regno2
)
14503 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
14506 /* Return true if we should emit lea instruction instead of mov
14510 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
14512 unsigned int regno0
, regno1
;
14514 /* Check if we need to optimize. */
14515 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
14518 /* Use lea for reg to reg moves only. */
14519 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
14522 regno0
= true_regnum (operands
[0]);
14523 regno1
= true_regnum (operands
[1]);
14525 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
14528 /* Return true if we need to split lea into a sequence of
14529 instructions to avoid AGU stalls. */
14532 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
14534 unsigned int regno0
, regno1
, regno2
;
14536 struct ix86_address parts
;
14539 /* Check we need to optimize. */
14540 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
14543 /* The "at least two components" test below might not catch simple
14544 move or zero extension insns if parts.base is non-NULL and parts.disp
14545 is const0_rtx as the only components in the address, e.g. if the
14546 register is %rbp or %r13. As this test is much cheaper and moves or
14547 zero extensions are the common case, do this check first. */
14548 if (REG_P (operands
[1])
14549 || (SImode_address_operand (operands
[1], VOIDmode
)
14550 && REG_P (XEXP (operands
[1], 0))))
14553 /* Check if it is OK to split here. */
14554 if (!ix86_ok_to_clobber_flags (insn
))
14557 ok
= ix86_decompose_address (operands
[1], &parts
);
14560 /* There should be at least two components in the address. */
14561 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
14562 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
14565 /* We should not split into add if non legitimate pic
14566 operand is used as displacement. */
14567 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
14570 regno0
= true_regnum (operands
[0]) ;
14571 regno1
= INVALID_REGNUM
;
14572 regno2
= INVALID_REGNUM
;
14575 regno1
= true_regnum (parts
.base
);
14577 regno2
= true_regnum (parts
.index
);
14581 /* Compute how many cycles we will add to execution time
14582 if split lea into a sequence of instructions. */
14583 if (parts
.base
|| parts
.index
)
14585 /* Have to use mov instruction if non desctructive
14586 destination form is used. */
14587 if (regno1
!= regno0
&& regno2
!= regno0
)
14590 /* Have to add index to base if both exist. */
14591 if (parts
.base
&& parts
.index
)
14594 /* Have to use shift and adds if scale is 2 or greater. */
14595 if (parts
.scale
> 1)
14597 if (regno0
!= regno1
)
14599 else if (regno2
== regno0
)
14602 split_cost
+= parts
.scale
;
14605 /* Have to use add instruction with immediate if
14606 disp is non zero. */
14607 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
14610 /* Subtract the price of lea. */
14614 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
14618 /* Return true if it is ok to optimize an ADD operation to LEA
14619 operation to avoid flag register consumation. For most processors,
14620 ADD is faster than LEA. For the processors like BONNELL, if the
14621 destination register of LEA holds an actual address which will be
14622 used soon, LEA is better and otherwise ADD is better. */
14625 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
14627 unsigned int regno0
= true_regnum (operands
[0]);
14628 unsigned int regno1
= true_regnum (operands
[1]);
14629 unsigned int regno2
= true_regnum (operands
[2]);
14631 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14632 if (regno0
!= regno1
&& regno0
!= regno2
)
14635 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
14638 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
14641 /* Return true if destination reg of SET_BODY is shift count of
14645 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
14651 /* Retrieve destination of SET_BODY. */
14652 switch (GET_CODE (set_body
))
14655 set_dest
= SET_DEST (set_body
);
14656 if (!set_dest
|| !REG_P (set_dest
))
14660 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
14661 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
14669 /* Retrieve shift count of USE_BODY. */
14670 switch (GET_CODE (use_body
))
14673 shift_rtx
= XEXP (use_body
, 1);
14676 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
14677 if (ix86_dep_by_shift_count_body (set_body
,
14678 XVECEXP (use_body
, 0, i
)))
14686 && (GET_CODE (shift_rtx
) == ASHIFT
14687 || GET_CODE (shift_rtx
) == LSHIFTRT
14688 || GET_CODE (shift_rtx
) == ASHIFTRT
14689 || GET_CODE (shift_rtx
) == ROTATE
14690 || GET_CODE (shift_rtx
) == ROTATERT
))
14692 rtx shift_count
= XEXP (shift_rtx
, 1);
14694 /* Return true if shift count is dest of SET_BODY. */
14695 if (REG_P (shift_count
))
14697 /* Add check since it can be invoked before register
14698 allocation in pre-reload schedule. */
14699 if (reload_completed
14700 && true_regnum (set_dest
) == true_regnum (shift_count
))
14702 else if (REGNO(set_dest
) == REGNO(shift_count
))
14710 /* Return true if destination reg of SET_INSN is shift count of
14714 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
14716 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
14717 PATTERN (use_insn
));
14720 /* Return TRUE or FALSE depending on whether the unary operator meets the
14721 appropriate constraints. */
14724 ix86_unary_operator_ok (enum rtx_code
,
14728 /* If one of operands is memory, source and destination must match. */
14729 if ((MEM_P (operands
[0])
14730 || MEM_P (operands
[1]))
14731 && ! rtx_equal_p (operands
[0], operands
[1]))
14736 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14737 are ok, keeping in mind the possible movddup alternative. */
14740 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
14742 if (MEM_P (operands
[0]))
14743 return rtx_equal_p (operands
[0], operands
[1 + high
]);
14744 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
14745 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
14749 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14750 then replicate the value for all elements of the vector
14754 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
14758 machine_mode scalar_mode
;
14782 n_elt
= GET_MODE_NUNITS (mode
);
14783 v
= rtvec_alloc (n_elt
);
14784 scalar_mode
= GET_MODE_INNER (mode
);
14786 RTVEC_ELT (v
, 0) = value
;
14788 for (i
= 1; i
< n_elt
; ++i
)
14789 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
14791 return gen_rtx_CONST_VECTOR (mode
, v
);
14794 gcc_unreachable ();
14798 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14799 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14800 for an SSE register. If VECT is true, then replicate the mask for
14801 all elements of the vector register. If INVERT is true, then create
14802 a mask excluding the sign bit. */
14805 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
14807 machine_mode vec_mode
, imode
;
14835 vec_mode
= VOIDmode
;
14840 gcc_unreachable ();
14843 machine_mode inner_mode
= GET_MODE_INNER (mode
);
14844 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
14845 GET_MODE_BITSIZE (inner_mode
));
14847 w
= wi::bit_not (w
);
14849 /* Force this value into the low part of a fp vector constant. */
14850 mask
= immed_wide_int_const (w
, imode
);
14851 mask
= gen_lowpart (inner_mode
, mask
);
14853 if (vec_mode
== VOIDmode
)
14854 return force_reg (inner_mode
, mask
);
14856 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
14857 return force_reg (vec_mode
, v
);
14860 /* Return TRUE or FALSE depending on whether the first SET in INSN
14861 has source and destination with matching CC modes, and that the
14862 CC mode is at least as constrained as REQ_MODE. */
14865 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
14868 machine_mode set_mode
;
14870 set
= PATTERN (insn
);
14871 if (GET_CODE (set
) == PARALLEL
)
14872 set
= XVECEXP (set
, 0, 0);
14873 gcc_assert (GET_CODE (set
) == SET
);
14874 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
14876 set_mode
= GET_MODE (SET_DEST (set
));
14880 if (req_mode
!= CCNOmode
14881 && (req_mode
!= CCmode
14882 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
14886 if (req_mode
== CCGCmode
)
14890 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
14894 if (req_mode
== CCZmode
)
14907 if (set_mode
!= req_mode
)
14912 gcc_unreachable ();
14915 return GET_MODE (SET_SRC (set
)) == set_mode
;
14919 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
14921 machine_mode mode
= GET_MODE (op0
);
14923 if (SCALAR_FLOAT_MODE_P (mode
))
14925 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
14931 /* Only zero flag is needed. */
14932 case EQ
: /* ZF=0 */
14933 case NE
: /* ZF!=0 */
14935 /* Codes needing carry flag. */
14936 case GEU
: /* CF=0 */
14937 case LTU
: /* CF=1 */
14938 /* Detect overflow checks. They need just the carry flag. */
14939 if (GET_CODE (op0
) == PLUS
14940 && (rtx_equal_p (op1
, XEXP (op0
, 0))
14941 || rtx_equal_p (op1
, XEXP (op0
, 1))))
14945 case GTU
: /* CF=0 & ZF=0 */
14946 case LEU
: /* CF=1 | ZF=1 */
14948 /* Codes possibly doable only with sign flag when
14949 comparing against zero. */
14950 case GE
: /* SF=OF or SF=0 */
14951 case LT
: /* SF<>OF or SF=1 */
14952 if (op1
== const0_rtx
)
14955 /* For other cases Carry flag is not required. */
14957 /* Codes doable only with sign flag when comparing
14958 against zero, but we miss jump instruction for it
14959 so we need to use relational tests against overflow
14960 that thus needs to be zero. */
14961 case GT
: /* ZF=0 & SF=OF */
14962 case LE
: /* ZF=1 | SF<>OF */
14963 if (op1
== const0_rtx
)
14967 /* strcmp pattern do (use flags) and combine may ask us for proper
14972 gcc_unreachable ();
14976 /* Return the fixed registers used for condition codes. */
14979 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
14982 *p2
= INVALID_REGNUM
;
14986 /* If two condition code modes are compatible, return a condition code
14987 mode which is compatible with both. Otherwise, return
14990 static machine_mode
14991 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
14996 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
14999 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
15000 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
15003 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
15004 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
15008 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
15010 else if (m2
== CCZmode
15011 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
15017 gcc_unreachable ();
15048 /* These are only compatible with themselves, which we already
15054 /* Return strategy to use for floating-point. We assume that fcomi is always
15055 preferrable where available, since that is also true when looking at size
15056 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15058 enum ix86_fpcmp_strategy
15059 ix86_fp_comparison_strategy (enum rtx_code
)
15061 /* Do fcomi/sahf based test when profitable. */
15064 return IX86_FPCMP_COMI
;
15066 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
15067 return IX86_FPCMP_SAHF
;
15069 return IX86_FPCMP_ARITH
;
15072 /* Convert comparison codes we use to represent FP comparison to integer
15073 code that will result in proper branch. Return UNKNOWN if no such code
15077 ix86_fp_compare_code_to_integer (enum rtx_code code
)
15101 /* Zero extend possibly SImode EXP to Pmode register. */
15103 ix86_zero_extend_to_Pmode (rtx exp
)
15105 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
15108 /* Return true if the function being called was marked with attribute
15109 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15110 to handle the non-PIC case in the backend because there is no easy
15111 interface for the front-end to force non-PLT calls to use the GOT.
15112 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15113 to call the function marked "noplt" indirectly. */
15116 ix86_nopic_noplt_attribute_p (rtx call_op
)
15118 if (flag_pic
|| ix86_cmodel
== CM_LARGE
15119 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
15120 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
15121 || SYMBOL_REF_LOCAL_P (call_op
))
15124 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
15127 || (symbol_decl
!= NULL_TREE
15128 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
15134 /* Helper to output the jmp/call. */
15136 ix86_output_jmp_thunk_or_indirect (const char *thunk_name
, const int regno
)
15138 if (thunk_name
!= NULL
)
15140 fprintf (asm_out_file
, "\tjmp\t");
15141 assemble_name (asm_out_file
, thunk_name
);
15142 putc ('\n', asm_out_file
);
15145 output_indirect_thunk (regno
);
15148 /* Output indirect branch via a call and return thunk. CALL_OP is a
15149 register which contains the branch target. XASM is the assembly
15150 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15151 A normal call is converted to:
15153 call __x86_indirect_thunk_reg
15155 and a tail call is converted to:
15157 jmp __x86_indirect_thunk_reg
15161 ix86_output_indirect_branch_via_reg (rtx call_op
, bool sibcall_p
)
15163 char thunk_name_buf
[32];
15165 enum indirect_thunk_prefix need_prefix
15166 = indirect_thunk_need_prefix (current_output_insn
);
15167 int regno
= REGNO (call_op
);
15169 if (cfun
->machine
->indirect_branch_type
15170 != indirect_branch_thunk_inline
)
15172 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15175 if (i
>= FIRST_REX_INT_REG
)
15176 i
-= (FIRST_REX_INT_REG
- LAST_INT_REG
- 1);
15177 indirect_thunks_used
|= 1 << i
;
15179 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15180 thunk_name
= thunk_name_buf
;
15186 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15189 if (thunk_name
!= NULL
)
15191 fprintf (asm_out_file
, "\tcall\t");
15192 assemble_name (asm_out_file
, thunk_name
);
15193 putc ('\n', asm_out_file
);
15197 char indirectlabel1
[32];
15198 char indirectlabel2
[32];
15200 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15202 indirectlabelno
++);
15203 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15205 indirectlabelno
++);
15208 fputs ("\tjmp\t", asm_out_file
);
15209 assemble_name_raw (asm_out_file
, indirectlabel2
);
15210 fputc ('\n', asm_out_file
);
15212 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15214 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15216 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15219 fputs ("\tcall\t", asm_out_file
);
15220 assemble_name_raw (asm_out_file
, indirectlabel1
);
15221 fputc ('\n', asm_out_file
);
15225 /* Output indirect branch via a call and return thunk. CALL_OP is
15226 the branch target. XASM is the assembly template for CALL_OP.
15227 Branch is a tail call if SIBCALL_P is true. A normal call is
15233 jmp __x86_indirect_thunk
15237 and a tail call is converted to:
15240 jmp __x86_indirect_thunk
15244 ix86_output_indirect_branch_via_push (rtx call_op
, const char *xasm
,
15247 char thunk_name_buf
[32];
15250 enum indirect_thunk_prefix need_prefix
15251 = indirect_thunk_need_prefix (current_output_insn
);
15254 if (cfun
->machine
->indirect_branch_type
15255 != indirect_branch_thunk_inline
)
15257 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15258 indirect_thunk_needed
= true;
15259 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15260 thunk_name
= thunk_name_buf
;
15265 snprintf (push_buf
, sizeof (push_buf
), "push{%c}\t%s",
15266 TARGET_64BIT
? 'q' : 'l', xasm
);
15270 output_asm_insn (push_buf
, &call_op
);
15271 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15275 char indirectlabel1
[32];
15276 char indirectlabel2
[32];
15278 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15280 indirectlabelno
++);
15281 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15283 indirectlabelno
++);
15286 fputs ("\tjmp\t", asm_out_file
);
15287 assemble_name_raw (asm_out_file
, indirectlabel2
);
15288 fputc ('\n', asm_out_file
);
15290 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15292 /* An external function may be called via GOT, instead of PLT. */
15293 if (MEM_P (call_op
))
15295 struct ix86_address parts
;
15296 rtx addr
= XEXP (call_op
, 0);
15297 if (ix86_decompose_address (addr
, &parts
)
15298 && parts
.base
== stack_pointer_rtx
)
15300 /* Since call will adjust stack by -UNITS_PER_WORD,
15301 we must convert "disp(stack, index, scale)" to
15302 "disp+UNITS_PER_WORD(stack, index, scale)". */
15305 addr
= gen_rtx_MULT (Pmode
, parts
.index
,
15306 GEN_INT (parts
.scale
));
15307 addr
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15311 addr
= stack_pointer_rtx
;
15314 if (parts
.disp
!= NULL_RTX
)
15315 disp
= plus_constant (Pmode
, parts
.disp
,
15318 disp
= GEN_INT (UNITS_PER_WORD
);
15320 addr
= gen_rtx_PLUS (Pmode
, addr
, disp
);
15321 call_op
= gen_rtx_MEM (GET_MODE (call_op
), addr
);
15325 output_asm_insn (push_buf
, &call_op
);
15327 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15329 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15332 fputs ("\tcall\t", asm_out_file
);
15333 assemble_name_raw (asm_out_file
, indirectlabel1
);
15334 fputc ('\n', asm_out_file
);
15338 /* Output indirect branch via a call and return thunk. CALL_OP is
15339 the branch target. XASM is the assembly template for CALL_OP.
15340 Branch is a tail call if SIBCALL_P is true. */
15343 ix86_output_indirect_branch (rtx call_op
, const char *xasm
,
15346 if (REG_P (call_op
))
15347 ix86_output_indirect_branch_via_reg (call_op
, sibcall_p
);
15349 ix86_output_indirect_branch_via_push (call_op
, xasm
, sibcall_p
);
15352 /* Output indirect jump. CALL_OP is the jump target. */
15355 ix86_output_indirect_jmp (rtx call_op
)
15357 if (cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
)
15359 /* We can't have red-zone since "call" in the indirect thunk
15360 pushes the return address onto stack, destroying red-zone. */
15361 if (ix86_red_zone_size
!= 0)
15362 gcc_unreachable ();
15364 ix86_output_indirect_branch (call_op
, "%0", true);
15368 return "%!jmp\t%A0";
15371 /* Output return instrumentation for current function if needed. */
15374 output_return_instrumentation (void)
15376 if (ix86_instrument_return
!= instrument_return_none
15378 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun
->decl
))
15380 if (ix86_flag_record_return
)
15381 fprintf (asm_out_file
, "1:\n");
15382 switch (ix86_instrument_return
)
15384 case instrument_return_call
:
15385 fprintf (asm_out_file
, "\tcall\t__return__\n");
15387 case instrument_return_nop5
:
15388 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15389 fprintf (asm_out_file
, ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15391 case instrument_return_none
:
15395 if (ix86_flag_record_return
)
15397 fprintf (asm_out_file
, "\t.section __return_loc, \"a\",@progbits\n");
15398 fprintf (asm_out_file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
15399 fprintf (asm_out_file
, "\t.previous\n");
15404 /* Output function return. CALL_OP is the jump target. Add a REP
15405 prefix to RET if LONG_P is true and function return is kept. */
15408 ix86_output_function_return (bool long_p
)
15410 output_return_instrumentation ();
15412 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
15414 char thunk_name
[32];
15415 enum indirect_thunk_prefix need_prefix
15416 = indirect_thunk_need_prefix (current_output_insn
);
15418 if (cfun
->machine
->function_return_type
15419 != indirect_branch_thunk_inline
)
15421 bool need_thunk
= (cfun
->machine
->function_return_type
15422 == indirect_branch_thunk
);
15423 indirect_thunk_name (thunk_name
, INVALID_REGNUM
, need_prefix
,
15425 indirect_return_needed
|= need_thunk
;
15426 fprintf (asm_out_file
, "\tjmp\t");
15427 assemble_name (asm_out_file
, thunk_name
);
15428 putc ('\n', asm_out_file
);
15431 output_indirect_thunk (INVALID_REGNUM
);
15439 return "rep%; ret";
15442 /* Output indirect function return. RET_OP is the function return
15446 ix86_output_indirect_function_return (rtx ret_op
)
15448 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
15450 char thunk_name
[32];
15451 enum indirect_thunk_prefix need_prefix
15452 = indirect_thunk_need_prefix (current_output_insn
);
15453 unsigned int regno
= REGNO (ret_op
);
15454 gcc_assert (regno
== CX_REG
);
15456 if (cfun
->machine
->function_return_type
15457 != indirect_branch_thunk_inline
)
15459 bool need_thunk
= (cfun
->machine
->function_return_type
15460 == indirect_branch_thunk
);
15461 indirect_thunk_name (thunk_name
, regno
, need_prefix
, true);
15465 indirect_return_via_cx
= true;
15466 indirect_thunks_used
|= 1 << CX_REG
;
15468 fprintf (asm_out_file
, "\tjmp\t");
15469 assemble_name (asm_out_file
, thunk_name
);
15470 putc ('\n', asm_out_file
);
15473 output_indirect_thunk (regno
);
15478 return "%!jmp\t%A0";
15481 /* Output the assembly for a call instruction. */
15484 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
15486 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
15487 bool output_indirect_p
15489 && cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
);
15490 bool seh_nop_p
= false;
15493 if (SIBLING_CALL_P (insn
))
15495 output_return_instrumentation ();
15498 if (ix86_nopic_noplt_attribute_p (call_op
))
15503 if (output_indirect_p
)
15504 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15506 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15510 if (output_indirect_p
)
15511 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15513 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15517 xasm
= "%!jmp\t%P0";
15519 /* SEH epilogue detection requires the indirect branch case
15520 to include REX.W. */
15521 else if (TARGET_SEH
)
15522 xasm
= "%!rex.W jmp\t%A0";
15525 if (output_indirect_p
)
15528 xasm
= "%!jmp\t%A0";
15531 if (output_indirect_p
&& !direct_p
)
15532 ix86_output_indirect_branch (call_op
, xasm
, true);
15534 output_asm_insn (xasm
, &call_op
);
15538 /* SEH unwinding can require an extra nop to be emitted in several
15539 circumstances. Determine if we have one of those. */
15544 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
15546 /* Prevent a catch region from being adjacent to a jump that would
15547 be interpreted as an epilogue sequence by the unwinder. */
15548 if (JUMP_P(i
) && CROSSING_JUMP_P (i
))
15554 /* If we get to another real insn, we don't need the nop. */
15558 /* If we get to the epilogue note, prevent a catch region from
15559 being adjacent to the standard epilogue sequence. If non-
15560 call-exceptions, we'll have done this during epilogue emission. */
15561 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
15562 && !flag_non_call_exceptions
15563 && !can_throw_internal (insn
))
15570 /* If we didn't find a real insn following the call, prevent the
15571 unwinder from looking into the next function. */
15578 if (ix86_nopic_noplt_attribute_p (call_op
))
15583 if (output_indirect_p
)
15584 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15586 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15590 if (output_indirect_p
)
15591 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15593 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15597 xasm
= "%!call\t%P0";
15601 if (output_indirect_p
)
15604 xasm
= "%!call\t%A0";
15607 if (output_indirect_p
&& !direct_p
)
15608 ix86_output_indirect_branch (call_op
, xasm
, false);
15610 output_asm_insn (xasm
, &call_op
);
15618 /* Return a MEM corresponding to a stack slot with mode MODE.
15619 Allocate a new slot if necessary.
15621 The RTL for a function can have several slots available: N is
15622 which slot to use. */
15625 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
15627 struct stack_local_entry
*s
;
15629 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15631 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15632 if (s
->mode
== mode
&& s
->n
== n
)
15633 return validize_mem (copy_rtx (s
->rtl
));
15635 s
= ggc_alloc
<stack_local_entry
> ();
15638 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15640 s
->next
= ix86_stack_locals
;
15641 ix86_stack_locals
= s
;
15642 return validize_mem (copy_rtx (s
->rtl
));
15646 ix86_instantiate_decls (void)
15648 struct stack_local_entry
*s
;
15650 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15651 if (s
->rtl
!= NULL_RTX
)
15652 instantiate_decl_rtl (s
->rtl
);
15655 /* Check whether x86 address PARTS is a pc-relative address. */
15658 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
15660 rtx base
, index
, disp
;
15662 base
= parts
->base
;
15663 index
= parts
->index
;
15664 disp
= parts
->disp
;
15666 if (disp
&& !base
&& !index
)
15672 if (GET_CODE (disp
) == CONST
)
15673 symbol
= XEXP (disp
, 0);
15674 if (GET_CODE (symbol
) == PLUS
15675 && CONST_INT_P (XEXP (symbol
, 1)))
15676 symbol
= XEXP (symbol
, 0);
15678 if (GET_CODE (symbol
) == LABEL_REF
15679 || (GET_CODE (symbol
) == SYMBOL_REF
15680 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
15681 || (GET_CODE (symbol
) == UNSPEC
15682 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
15683 || XINT (symbol
, 1) == UNSPEC_PCREL
15684 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
15691 /* Calculate the length of the memory address in the instruction encoding.
15692 Includes addr32 prefix, does not include the one-byte modrm, opcode,
15693 or other prefixes. We never generate addr32 prefix for LEA insn. */
15696 memory_address_length (rtx addr
, bool lea
)
15698 struct ix86_address parts
;
15699 rtx base
, index
, disp
;
15703 if (GET_CODE (addr
) == PRE_DEC
15704 || GET_CODE (addr
) == POST_INC
15705 || GET_CODE (addr
) == PRE_MODIFY
15706 || GET_CODE (addr
) == POST_MODIFY
)
15709 ok
= ix86_decompose_address (addr
, &parts
);
15712 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
15714 /* If this is not LEA instruction, add the length of addr32 prefix. */
15715 if (TARGET_64BIT
&& !lea
15716 && (SImode_address_operand (addr
, VOIDmode
)
15717 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
15718 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
15722 index
= parts
.index
;
15725 if (base
&& SUBREG_P (base
))
15726 base
= SUBREG_REG (base
);
15727 if (index
&& SUBREG_P (index
))
15728 index
= SUBREG_REG (index
);
15730 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
15731 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
15734 - esp as the base always wants an index,
15735 - ebp as the base always wants a displacement,
15736 - r12 as the base always wants an index,
15737 - r13 as the base always wants a displacement. */
15739 /* Register Indirect. */
15740 if (base
&& !index
&& !disp
)
15742 /* esp (for its index) and ebp (for its displacement) need
15743 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
15745 if (base
== arg_pointer_rtx
15746 || base
== frame_pointer_rtx
15747 || REGNO (base
) == SP_REG
15748 || REGNO (base
) == BP_REG
15749 || REGNO (base
) == R12_REG
15750 || REGNO (base
) == R13_REG
)
15754 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
15755 is not disp32, but disp32(%rip), so for disp32
15756 SIB byte is needed, unless print_operand_address
15757 optimizes it into disp32(%rip) or (%rip) is implied
15759 else if (disp
&& !base
&& !index
)
15762 if (!ix86_rip_relative_addr_p (&parts
))
15767 /* Find the length of the displacement constant. */
15770 if (base
&& satisfies_constraint_K (disp
))
15775 /* ebp always wants a displacement. Similarly r13. */
15776 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
15779 /* An index requires the two-byte modrm form.... */
15781 /* ...like esp (or r12), which always wants an index. */
15782 || base
== arg_pointer_rtx
15783 || base
== frame_pointer_rtx
15784 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
15791 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15792 is set, expect that insn have 8bit immediate alternative. */
15794 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
15798 extract_insn_cached (insn
);
15799 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15800 if (CONSTANT_P (recog_data
.operand
[i
]))
15802 enum attr_mode mode
= get_attr_mode (insn
);
15805 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
15807 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
15814 ival
= trunc_int_for_mode (ival
, HImode
);
15817 ival
= trunc_int_for_mode (ival
, SImode
);
15822 if (IN_RANGE (ival
, -128, 127))
15839 /* Immediates for DImode instructions are encoded
15840 as 32bit sign extended values. */
15845 fatal_insn ("unknown insn mode", insn
);
15851 /* Compute default value for "length_address" attribute. */
15853 ix86_attr_length_address_default (rtx_insn
*insn
)
15857 if (get_attr_type (insn
) == TYPE_LEA
)
15859 rtx set
= PATTERN (insn
), addr
;
15861 if (GET_CODE (set
) == PARALLEL
)
15862 set
= XVECEXP (set
, 0, 0);
15864 gcc_assert (GET_CODE (set
) == SET
);
15866 addr
= SET_SRC (set
);
15868 return memory_address_length (addr
, true);
15871 extract_insn_cached (insn
);
15872 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15874 rtx op
= recog_data
.operand
[i
];
15877 constrain_operands_cached (insn
, reload_completed
);
15878 if (which_alternative
!= -1)
15880 const char *constraints
= recog_data
.constraints
[i
];
15881 int alt
= which_alternative
;
15883 while (*constraints
== '=' || *constraints
== '+')
15886 while (*constraints
++ != ',')
15888 /* Skip ignored operands. */
15889 if (*constraints
== 'X')
15893 int len
= memory_address_length (XEXP (op
, 0), false);
15895 /* Account for segment prefix for non-default addr spaces. */
15896 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
15905 /* Compute default value for "length_vex" attribute. It includes
15906 2 or 3 byte VEX prefix and 1 opcode byte. */
15909 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
15914 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
15915 byte VEX prefix. */
15916 if (!has_0f_opcode
|| has_vex_w
)
15919 /* We can always use 2 byte VEX prefix in 32bit. */
15923 extract_insn_cached (insn
);
15925 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15926 if (REG_P (recog_data
.operand
[i
]))
15928 /* REX.W bit uses 3 byte VEX prefix. */
15929 if (GET_MODE (recog_data
.operand
[i
]) == DImode
15930 && GENERAL_REG_P (recog_data
.operand
[i
]))
15935 /* REX.X or REX.B bits use 3 byte VEX prefix. */
15936 if (MEM_P (recog_data
.operand
[i
])
15937 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
15946 ix86_class_likely_spilled_p (reg_class_t
);
15948 /* Returns true if lhs of insn is HW function argument register and set up
15949 is_spilled to true if it is likely spilled HW register. */
15951 insn_is_function_arg (rtx insn
, bool* is_spilled
)
15955 if (!NONDEBUG_INSN_P (insn
))
15957 /* Call instructions are not movable, ignore it. */
15960 insn
= PATTERN (insn
);
15961 if (GET_CODE (insn
) == PARALLEL
)
15962 insn
= XVECEXP (insn
, 0, 0);
15963 if (GET_CODE (insn
) != SET
)
15965 dst
= SET_DEST (insn
);
15966 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
15967 && ix86_function_arg_regno_p (REGNO (dst
)))
15969 /* Is it likely spilled HW register? */
15970 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
15971 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
15972 *is_spilled
= true;
15978 /* Add output dependencies for chain of function adjacent arguments if only
15979 there is a move to likely spilled HW register. Return first argument
15980 if at least one dependence was added or NULL otherwise. */
15982 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
15985 rtx_insn
*last
= call
;
15986 rtx_insn
*first_arg
= NULL
;
15987 bool is_spilled
= false;
15989 head
= PREV_INSN (head
);
15991 /* Find nearest to call argument passing instruction. */
15994 last
= PREV_INSN (last
);
15997 if (!NONDEBUG_INSN_P (last
))
15999 if (insn_is_function_arg (last
, &is_spilled
))
16007 insn
= PREV_INSN (last
);
16008 if (!INSN_P (insn
))
16012 if (!NONDEBUG_INSN_P (insn
))
16017 if (insn_is_function_arg (insn
, &is_spilled
))
16019 /* Add output depdendence between two function arguments if chain
16020 of output arguments contains likely spilled HW registers. */
16022 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16023 first_arg
= last
= insn
;
16033 /* Add output or anti dependency from insn to first_arg to restrict its code
16036 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
16041 set
= single_set (insn
);
16044 tmp
= SET_DEST (set
);
16047 /* Add output dependency to the first function argument. */
16048 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16051 /* Add anti dependency. */
16052 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
16055 /* Avoid cross block motion of function argument through adding dependency
16056 from the first non-jump instruction in bb. */
16058 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
16060 rtx_insn
*insn
= BB_END (bb
);
16064 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
16066 rtx set
= single_set (insn
);
16069 avoid_func_arg_motion (arg
, insn
);
16073 if (insn
== BB_HEAD (bb
))
16075 insn
= PREV_INSN (insn
);
16079 /* Hook for pre-reload schedule - avoid motion of function arguments
16080 passed in likely spilled HW registers. */
16082 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
16085 rtx_insn
*first_arg
= NULL
;
16086 if (reload_completed
)
16088 while (head
!= tail
&& DEBUG_INSN_P (head
))
16089 head
= NEXT_INSN (head
);
16090 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16091 if (INSN_P (insn
) && CALL_P (insn
))
16093 first_arg
= add_parameter_dependencies (insn
, head
);
16096 /* Add dependee for first argument to predecessors if only
16097 region contains more than one block. */
16098 basic_block bb
= BLOCK_FOR_INSN (insn
);
16099 int rgn
= CONTAINING_RGN (bb
->index
);
16100 int nr_blks
= RGN_NR_BLOCKS (rgn
);
16101 /* Skip trivial regions and region head blocks that can have
16102 predecessors outside of region. */
16103 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
16108 /* Regions are SCCs with the exception of selective
16109 scheduling with pipelining of outer blocks enabled.
16110 So also check that immediate predecessors of a non-head
16111 block are in the same region. */
16112 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16114 /* Avoid creating of loop-carried dependencies through
16115 using topological ordering in the region. */
16116 if (rgn
== CONTAINING_RGN (e
->src
->index
)
16117 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
16118 add_dependee_for_func_arg (first_arg
, e
->src
);
16126 else if (first_arg
)
16127 avoid_func_arg_motion (first_arg
, insn
);
16130 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16131 HW registers to maximum, to schedule them at soon as possible. These are
16132 moves from function argument registers at the top of the function entry
16133 and moves from function return value registers after call. */
16135 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
16139 if (reload_completed
)
16142 if (!NONDEBUG_INSN_P (insn
))
16145 set
= single_set (insn
);
16148 rtx tmp
= SET_SRC (set
);
16150 && HARD_REGISTER_P (tmp
)
16151 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
16152 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
16153 return current_sched_info
->sched_max_insns_priority
;
16159 /* Prepare for scheduling pass. */
16161 ix86_sched_init_global (FILE *, int, int)
16163 /* Install scheduling hooks for current CPU. Some of these hooks are used
16164 in time-critical parts of the scheduler, so we only set them up when
16165 they are actually used. */
16168 case PROCESSOR_CORE2
:
16169 case PROCESSOR_NEHALEM
:
16170 case PROCESSOR_SANDYBRIDGE
:
16171 case PROCESSOR_HASWELL
:
16172 case PROCESSOR_GENERIC
:
16173 /* Do not perform multipass scheduling for pre-reload schedule
16174 to save compile time. */
16175 if (reload_completed
)
16177 ix86_core2i7_init_hooks ();
16180 /* Fall through. */
16182 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
16183 targetm
.sched
.first_cycle_multipass_init
= NULL
;
16184 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
16185 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
16186 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
16187 targetm
.sched
.first_cycle_multipass_end
= NULL
;
16188 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
16194 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16196 static HOST_WIDE_INT
16197 ix86_static_rtx_alignment (machine_mode mode
)
16199 if (mode
== DFmode
)
16201 if (ALIGN_MODE_128 (mode
))
16202 return MAX (128, GET_MODE_ALIGNMENT (mode
));
16203 return GET_MODE_ALIGNMENT (mode
);
16206 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16208 static HOST_WIDE_INT
16209 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
16211 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
16212 || TREE_CODE (exp
) == INTEGER_CST
)
16214 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
16215 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
16216 return MAX (mode_align
, align
);
16218 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16219 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16220 return BITS_PER_WORD
;
16225 /* Implement TARGET_EMPTY_RECORD_P. */
16228 ix86_is_empty_record (const_tree type
)
16232 return default_is_empty_record (type
);
16235 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16238 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
16240 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
16242 if (!cum
->warn_empty
)
16245 if (!TYPE_EMPTY_P (type
))
16248 /* Don't warn if the function isn't visible outside of the TU. */
16249 if (cum
->decl
&& !TREE_PUBLIC (cum
->decl
))
16252 const_tree ctx
= get_ultimate_context (cum
->decl
);
16253 if (ctx
!= NULL_TREE
16254 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
16257 /* If the actual size of the type is zero, then there is no change
16258 in how objects of this size are passed. */
16259 if (int_size_in_bytes (type
) == 0)
16262 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
16263 "changes in %<-fabi-version=12%> (GCC 8)", type
);
16265 /* Only warn once. */
16266 cum
->warn_empty
= false;
16269 /* This hook returns name of multilib ABI. */
16271 static const char *
16272 ix86_get_multilib_abi_name (void)
16274 if (!(TARGET_64BIT_P (ix86_isa_flags
)))
16276 else if (TARGET_X32_P (ix86_isa_flags
))
16282 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16283 the data type, and ALIGN is the alignment that the object would
16284 ordinarily have. */
16287 iamcu_alignment (tree type
, int align
)
16291 if (align
< 32 || TYPE_USER_ALIGN (type
))
16294 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16296 mode
= TYPE_MODE (strip_array_types (type
));
16297 switch (GET_MODE_CLASS (mode
))
16300 case MODE_COMPLEX_INT
:
16301 case MODE_COMPLEX_FLOAT
:
16303 case MODE_DECIMAL_FLOAT
:
16310 /* Compute the alignment for a static variable.
16311 TYPE is the data type, and ALIGN is the alignment that
16312 the object would ordinarily have. The value of this function is used
16313 instead of that alignment to align the object. */
16316 ix86_data_alignment (tree type
, unsigned int align
, bool opt
)
16318 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16319 for symbols from other compilation units or symbols that don't need
16320 to bind locally. In order to preserve some ABI compatibility with
16321 those compilers, ensure we don't decrease alignment from what we
16324 unsigned int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
16326 /* A data structure, equal or greater than the size of a cache line
16327 (64 bytes in the Pentium 4 and other recent Intel processors, including
16328 processors based on Intel Core microarchitecture) should be aligned
16329 so that its base address is a multiple of a cache line size. */
16331 unsigned int max_align
16332 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
16334 if (max_align
< BITS_PER_WORD
)
16335 max_align
= BITS_PER_WORD
;
16337 switch (ix86_align_data_type
)
16339 case ix86_align_data_type_abi
: opt
= false; break;
16340 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
16341 case ix86_align_data_type_cacheline
: break;
16345 align
= iamcu_alignment (type
, align
);
16348 && AGGREGATE_TYPE_P (type
)
16349 && TYPE_SIZE (type
)
16350 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
16352 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
16353 && align
< max_align_compat
)
16354 align
= max_align_compat
;
16355 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
16356 && align
< max_align
)
16360 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16361 to 16byte boundary. */
16364 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
16365 && TYPE_SIZE (type
)
16366 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16367 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
16375 if (TREE_CODE (type
) == ARRAY_TYPE
)
16377 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16379 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16382 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16385 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16387 if ((TYPE_MODE (type
) == XCmode
16388 || TYPE_MODE (type
) == TCmode
) && align
< 128)
16391 else if ((TREE_CODE (type
) == RECORD_TYPE
16392 || TREE_CODE (type
) == UNION_TYPE
16393 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16394 && TYPE_FIELDS (type
))
16396 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16398 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16401 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16402 || TREE_CODE (type
) == INTEGER_TYPE
)
16404 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16406 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16413 /* Compute the alignment for a local variable or a stack slot. EXP is
16414 the data type or decl itself, MODE is the widest mode available and
16415 ALIGN is the alignment that the object would ordinarily have. The
16416 value of this macro is used instead of that alignment to align the
16420 ix86_local_alignment (tree exp
, machine_mode mode
,
16421 unsigned int align
)
16425 if (exp
&& DECL_P (exp
))
16427 type
= TREE_TYPE (exp
);
16436 /* Don't do dynamic stack realignment for long long objects with
16437 -mpreferred-stack-boundary=2. */
16440 && ix86_preferred_stack_boundary
< 64
16441 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
16442 && (!type
|| !TYPE_USER_ALIGN (type
))
16443 && (!decl
|| !DECL_USER_ALIGN (decl
)))
16446 /* If TYPE is NULL, we are allocating a stack slot for caller-save
16447 register in MODE. We will return the largest alignment of XF
16451 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
16452 align
= GET_MODE_ALIGNMENT (DFmode
);
16456 /* Don't increase alignment for Intel MCU psABI. */
16460 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16461 to 16byte boundary. Exact wording is:
16463 An array uses the same alignment as its elements, except that a local or
16464 global array variable of length at least 16 bytes or
16465 a C99 variable-length array variable always has alignment of at least 16 bytes.
16467 This was added to allow use of aligned SSE instructions at arrays. This
16468 rule is meant for static storage (where compiler cannot do the analysis
16469 by itself). We follow it for automatic variables only when convenient.
16470 We fully control everything in the function compiled and functions from
16471 other unit cannot rely on the alignment.
16473 Exclude va_list type. It is the common case of local array where
16474 we cannot benefit from the alignment.
16476 TODO: Probably one should optimize for size only when var is not escaping. */
16477 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
16480 if (AGGREGATE_TYPE_P (type
)
16481 && (va_list_type_node
== NULL_TREE
16482 || (TYPE_MAIN_VARIANT (type
)
16483 != TYPE_MAIN_VARIANT (va_list_type_node
)))
16484 && TYPE_SIZE (type
)
16485 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16486 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
16490 if (TREE_CODE (type
) == ARRAY_TYPE
)
16492 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16494 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16497 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16499 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16501 if ((TYPE_MODE (type
) == XCmode
16502 || TYPE_MODE (type
) == TCmode
) && align
< 128)
16505 else if ((TREE_CODE (type
) == RECORD_TYPE
16506 || TREE_CODE (type
) == UNION_TYPE
16507 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16508 && TYPE_FIELDS (type
))
16510 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16512 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16515 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16516 || TREE_CODE (type
) == INTEGER_TYPE
)
16519 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16521 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16527 /* Compute the minimum required alignment for dynamic stack realignment
16528 purposes for a local variable, parameter or a stack slot. EXP is
16529 the data type or decl itself, MODE is its mode and ALIGN is the
16530 alignment that the object would ordinarily have. */
16533 ix86_minimum_alignment (tree exp
, machine_mode mode
,
16534 unsigned int align
)
16538 if (exp
&& DECL_P (exp
))
16540 type
= TREE_TYPE (exp
);
16549 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
16552 /* Don't do dynamic stack realignment for long long objects with
16553 -mpreferred-stack-boundary=2. */
16554 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
16555 && (!type
|| !TYPE_USER_ALIGN (type
))
16556 && (!decl
|| !DECL_USER_ALIGN (decl
)))
16558 gcc_checking_assert (!TARGET_STV
);
16565 /* Find a location for the static chain incoming to a nested function.
16566 This is a register, unless all free registers are used by arguments. */
16569 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
16575 /* We always use R10 in 64-bit mode. */
16580 const_tree fntype
, fndecl
;
16583 /* By default in 32-bit mode we use ECX to pass the static chain. */
16586 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
16588 fntype
= TREE_TYPE (fndecl_or_type
);
16589 fndecl
= fndecl_or_type
;
16593 fntype
= fndecl_or_type
;
16597 ccvt
= ix86_get_callcvt (fntype
);
16598 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
16600 /* Fastcall functions use ecx/edx for arguments, which leaves
16601 us with EAX for the static chain.
16602 Thiscall functions use ecx for arguments, which also
16603 leaves us with EAX for the static chain. */
16606 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
16608 /* Thiscall functions use ecx for arguments, which leaves
16609 us with EAX and EDX for the static chain.
16610 We are using for abi-compatibility EAX. */
16613 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
16615 /* For regparm 3, we have no free call-clobbered registers in
16616 which to store the static chain. In order to implement this,
16617 we have the trampoline push the static chain to the stack.
16618 However, we can't push a value below the return address when
16619 we call the nested function directly, so we have to use an
16620 alternate entry point. For this we use ESI, and have the
16621 alternate entry point push ESI, so that things appear the
16622 same once we're executing the nested function. */
16625 if (fndecl
== current_function_decl
16626 && !ix86_static_chain_on_stack
)
16628 gcc_assert (!reload_completed
);
16629 ix86_static_chain_on_stack
= true;
16631 return gen_frame_mem (SImode
,
16632 plus_constant (Pmode
,
16633 arg_pointer_rtx
, -8));
16639 return gen_rtx_REG (Pmode
, regno
);
16642 /* Emit RTL insns to initialize the variable parts of a trampoline.
16643 FNDECL is the decl of the target address; M_TRAMP is a MEM for
16644 the trampoline, and CHAIN_VALUE is an RTX for the static chain
16645 to be passed to the target function. */
16648 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
16653 bool need_endbr
= (flag_cf_protection
& CF_BRANCH
);
16655 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
16663 /* Insert ENDBR64. */
16664 mem
= adjust_address (m_tramp
, SImode
, offset
);
16665 emit_move_insn (mem
, gen_int_mode (0xfa1e0ff3, SImode
));
16669 /* Load the function address to r11. Try to load address using
16670 the shorter movl instead of movabs. We may want to support
16671 movq for kernel mode, but kernel does not use trampolines at
16672 the moment. FNADDR is a 32bit address and may not be in
16673 DImode when ptr_mode == SImode. Always use movl in this
16675 if (ptr_mode
== SImode
16676 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16678 fnaddr
= copy_addr_to_reg (fnaddr
);
16680 mem
= adjust_address (m_tramp
, HImode
, offset
);
16681 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
16683 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
16684 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
16689 mem
= adjust_address (m_tramp
, HImode
, offset
);
16690 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
16692 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
16693 emit_move_insn (mem
, fnaddr
);
16697 /* Load static chain using movabs to r10. Use the shorter movl
16698 instead of movabs when ptr_mode == SImode. */
16699 if (ptr_mode
== SImode
)
16710 mem
= adjust_address (m_tramp
, HImode
, offset
);
16711 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
16713 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
16714 emit_move_insn (mem
, chain_value
);
16717 /* Jump to r11; the last (unused) byte is a nop, only there to
16718 pad the write out to a single 32-bit store. */
16719 mem
= adjust_address (m_tramp
, SImode
, offset
);
16720 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
16727 /* Depending on the static chain location, either load a register
16728 with a constant, or push the constant to the stack. All of the
16729 instructions are the same size. */
16730 chain
= ix86_static_chain (fndecl
, true);
16733 switch (REGNO (chain
))
16736 opcode
= 0xb8; break;
16738 opcode
= 0xb9; break;
16740 gcc_unreachable ();
16748 /* Insert ENDBR32. */
16749 mem
= adjust_address (m_tramp
, SImode
, offset
);
16750 emit_move_insn (mem
, gen_int_mode (0xfb1e0ff3, SImode
));
16754 mem
= adjust_address (m_tramp
, QImode
, offset
);
16755 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
16757 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
16758 emit_move_insn (mem
, chain_value
);
16761 mem
= adjust_address (m_tramp
, QImode
, offset
);
16762 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
16764 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
16766 /* Compute offset from the end of the jmp to the target function.
16767 In the case in which the trampoline stores the static chain on
16768 the stack, we need to skip the first insn which pushes the
16769 (call-saved) register static chain; this push is 1 byte. */
16771 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16772 plus_constant (Pmode
, XEXP (m_tramp
, 0),
16773 offset
- (MEM_P (chain
) ? 1 : 0)),
16774 NULL_RTX
, 1, OPTAB_DIRECT
);
16775 emit_move_insn (mem
, disp
);
16778 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16780 #ifdef HAVE_ENABLE_EXECUTE_STACK
16781 #ifdef CHECK_EXECUTE_STACK_ENABLED
16782 if (CHECK_EXECUTE_STACK_ENABLED
)
16784 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16785 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
16790 ix86_allocate_stack_slots_for_args (void)
16792 /* Naked functions should not allocate stack slots for arguments. */
16793 return !ix86_function_naked (current_function_decl
);
16797 ix86_warn_func_return (tree decl
)
16799 /* Naked functions are implemented entirely in assembly, including the
16800 return sequence, so suppress warnings about this. */
16801 return !ix86_function_naked (decl
);
16804 /* Return the shift count of a vector by scalar shift builtin second argument
16807 ix86_vector_shift_count (tree arg1
)
16809 if (tree_fits_uhwi_p (arg1
))
16811 else if (TREE_CODE (arg1
) == VECTOR_CST
&& CHAR_BIT
== 8)
16813 /* The count argument is weird, passed in as various 128-bit
16814 (or 64-bit) vectors, the low 64 bits from it are the count. */
16815 unsigned char buf
[16];
16816 int len
= native_encode_expr (arg1
, buf
, 16);
16819 tree t
= native_interpret_expr (uint64_type_node
, buf
, len
);
16820 if (t
&& tree_fits_uhwi_p (t
))
16827 ix86_fold_builtin (tree fndecl
, int n_args
,
16828 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
16830 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
16832 enum ix86_builtins fn_code
= (enum ix86_builtins
)
16833 DECL_FUNCTION_CODE (fndecl
);
16834 enum rtx_code rcode
;
16836 unsigned HOST_WIDE_INT mask
;
16840 case IX86_BUILTIN_CPU_IS
:
16841 case IX86_BUILTIN_CPU_SUPPORTS
:
16842 gcc_assert (n_args
== 1);
16843 return fold_builtin_cpu (fndecl
, args
);
16845 case IX86_BUILTIN_NANQ
:
16846 case IX86_BUILTIN_NANSQ
:
16848 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16849 const char *str
= c_getstr (*args
);
16850 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
16851 REAL_VALUE_TYPE real
;
16853 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
16854 return build_real (type
, real
);
16858 case IX86_BUILTIN_INFQ
:
16859 case IX86_BUILTIN_HUGE_VALQ
:
16861 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16862 REAL_VALUE_TYPE inf
;
16864 return build_real (type
, inf
);
16867 case IX86_BUILTIN_TZCNT16
:
16868 case IX86_BUILTIN_CTZS
:
16869 case IX86_BUILTIN_TZCNT32
:
16870 case IX86_BUILTIN_TZCNT64
:
16871 gcc_assert (n_args
== 1);
16872 if (TREE_CODE (args
[0]) == INTEGER_CST
)
16874 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16875 tree arg
= args
[0];
16876 if (fn_code
== IX86_BUILTIN_TZCNT16
16877 || fn_code
== IX86_BUILTIN_CTZS
)
16878 arg
= fold_convert (short_unsigned_type_node
, arg
);
16879 if (integer_zerop (arg
))
16880 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
16882 return fold_const_call (CFN_CTZ
, type
, arg
);
16886 case IX86_BUILTIN_LZCNT16
:
16887 case IX86_BUILTIN_CLZS
:
16888 case IX86_BUILTIN_LZCNT32
:
16889 case IX86_BUILTIN_LZCNT64
:
16890 gcc_assert (n_args
== 1);
16891 if (TREE_CODE (args
[0]) == INTEGER_CST
)
16893 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16894 tree arg
= args
[0];
16895 if (fn_code
== IX86_BUILTIN_LZCNT16
16896 || fn_code
== IX86_BUILTIN_CLZS
)
16897 arg
= fold_convert (short_unsigned_type_node
, arg
);
16898 if (integer_zerop (arg
))
16899 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
16901 return fold_const_call (CFN_CLZ
, type
, arg
);
16905 case IX86_BUILTIN_BEXTR32
:
16906 case IX86_BUILTIN_BEXTR64
:
16907 case IX86_BUILTIN_BEXTRI32
:
16908 case IX86_BUILTIN_BEXTRI64
:
16909 gcc_assert (n_args
== 2);
16910 if (tree_fits_uhwi_p (args
[1]))
16912 unsigned HOST_WIDE_INT res
= 0;
16913 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
16914 unsigned int start
= tree_to_uhwi (args
[1]);
16915 unsigned int len
= (start
& 0xff00) >> 8;
16917 if (start
>= prec
|| len
== 0)
16919 else if (!tree_fits_uhwi_p (args
[0]))
16922 res
= tree_to_uhwi (args
[0]) >> start
;
16925 if (len
< HOST_BITS_PER_WIDE_INT
)
16926 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
16927 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16931 case IX86_BUILTIN_BZHI32
:
16932 case IX86_BUILTIN_BZHI64
:
16933 gcc_assert (n_args
== 2);
16934 if (tree_fits_uhwi_p (args
[1]))
16936 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
16937 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
16940 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), 0);
16941 if (!tree_fits_uhwi_p (args
[0]))
16943 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
16944 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
16945 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16949 case IX86_BUILTIN_PDEP32
:
16950 case IX86_BUILTIN_PDEP64
:
16951 gcc_assert (n_args
== 2);
16952 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
16954 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
16955 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
16956 unsigned HOST_WIDE_INT res
= 0;
16957 unsigned HOST_WIDE_INT m
, k
= 1;
16958 for (m
= 1; m
; m
<<= 1)
16959 if ((mask
& m
) != 0)
16961 if ((src
& k
) != 0)
16965 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16969 case IX86_BUILTIN_PEXT32
:
16970 case IX86_BUILTIN_PEXT64
:
16971 gcc_assert (n_args
== 2);
16972 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
16974 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
16975 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
16976 unsigned HOST_WIDE_INT res
= 0;
16977 unsigned HOST_WIDE_INT m
, k
= 1;
16978 for (m
= 1; m
; m
<<= 1)
16979 if ((mask
& m
) != 0)
16981 if ((src
& m
) != 0)
16985 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16989 case IX86_BUILTIN_MOVMSKPS
:
16990 case IX86_BUILTIN_PMOVMSKB
:
16991 case IX86_BUILTIN_MOVMSKPD
:
16992 case IX86_BUILTIN_PMOVMSKB128
:
16993 case IX86_BUILTIN_MOVMSKPD256
:
16994 case IX86_BUILTIN_MOVMSKPS256
:
16995 case IX86_BUILTIN_PMOVMSKB256
:
16996 gcc_assert (n_args
== 1);
16997 if (TREE_CODE (args
[0]) == VECTOR_CST
)
16999 HOST_WIDE_INT res
= 0;
17000 for (unsigned i
= 0; i
< VECTOR_CST_NELTS (args
[0]); ++i
)
17002 tree e
= VECTOR_CST_ELT (args
[0], i
);
17003 if (TREE_CODE (e
) == INTEGER_CST
&& !TREE_OVERFLOW (e
))
17005 if (wi::neg_p (wi::to_wide (e
)))
17006 res
|= HOST_WIDE_INT_1
<< i
;
17008 else if (TREE_CODE (e
) == REAL_CST
&& !TREE_OVERFLOW (e
))
17010 if (TREE_REAL_CST (e
).sign
)
17011 res
|= HOST_WIDE_INT_1
<< i
;
17016 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17020 case IX86_BUILTIN_PSLLD
:
17021 case IX86_BUILTIN_PSLLD128
:
17022 case IX86_BUILTIN_PSLLD128_MASK
:
17023 case IX86_BUILTIN_PSLLD256
:
17024 case IX86_BUILTIN_PSLLD256_MASK
:
17025 case IX86_BUILTIN_PSLLD512
:
17026 case IX86_BUILTIN_PSLLDI
:
17027 case IX86_BUILTIN_PSLLDI128
:
17028 case IX86_BUILTIN_PSLLDI128_MASK
:
17029 case IX86_BUILTIN_PSLLDI256
:
17030 case IX86_BUILTIN_PSLLDI256_MASK
:
17031 case IX86_BUILTIN_PSLLDI512
:
17032 case IX86_BUILTIN_PSLLQ
:
17033 case IX86_BUILTIN_PSLLQ128
:
17034 case IX86_BUILTIN_PSLLQ128_MASK
:
17035 case IX86_BUILTIN_PSLLQ256
:
17036 case IX86_BUILTIN_PSLLQ256_MASK
:
17037 case IX86_BUILTIN_PSLLQ512
:
17038 case IX86_BUILTIN_PSLLQI
:
17039 case IX86_BUILTIN_PSLLQI128
:
17040 case IX86_BUILTIN_PSLLQI128_MASK
:
17041 case IX86_BUILTIN_PSLLQI256
:
17042 case IX86_BUILTIN_PSLLQI256_MASK
:
17043 case IX86_BUILTIN_PSLLQI512
:
17044 case IX86_BUILTIN_PSLLW
:
17045 case IX86_BUILTIN_PSLLW128
:
17046 case IX86_BUILTIN_PSLLW128_MASK
:
17047 case IX86_BUILTIN_PSLLW256
:
17048 case IX86_BUILTIN_PSLLW256_MASK
:
17049 case IX86_BUILTIN_PSLLW512_MASK
:
17050 case IX86_BUILTIN_PSLLWI
:
17051 case IX86_BUILTIN_PSLLWI128
:
17052 case IX86_BUILTIN_PSLLWI128_MASK
:
17053 case IX86_BUILTIN_PSLLWI256
:
17054 case IX86_BUILTIN_PSLLWI256_MASK
:
17055 case IX86_BUILTIN_PSLLWI512_MASK
:
17059 case IX86_BUILTIN_PSRAD
:
17060 case IX86_BUILTIN_PSRAD128
:
17061 case IX86_BUILTIN_PSRAD128_MASK
:
17062 case IX86_BUILTIN_PSRAD256
:
17063 case IX86_BUILTIN_PSRAD256_MASK
:
17064 case IX86_BUILTIN_PSRAD512
:
17065 case IX86_BUILTIN_PSRADI
:
17066 case IX86_BUILTIN_PSRADI128
:
17067 case IX86_BUILTIN_PSRADI128_MASK
:
17068 case IX86_BUILTIN_PSRADI256
:
17069 case IX86_BUILTIN_PSRADI256_MASK
:
17070 case IX86_BUILTIN_PSRADI512
:
17071 case IX86_BUILTIN_PSRAQ128_MASK
:
17072 case IX86_BUILTIN_PSRAQ256_MASK
:
17073 case IX86_BUILTIN_PSRAQ512
:
17074 case IX86_BUILTIN_PSRAQI128_MASK
:
17075 case IX86_BUILTIN_PSRAQI256_MASK
:
17076 case IX86_BUILTIN_PSRAQI512
:
17077 case IX86_BUILTIN_PSRAW
:
17078 case IX86_BUILTIN_PSRAW128
:
17079 case IX86_BUILTIN_PSRAW128_MASK
:
17080 case IX86_BUILTIN_PSRAW256
:
17081 case IX86_BUILTIN_PSRAW256_MASK
:
17082 case IX86_BUILTIN_PSRAW512
:
17083 case IX86_BUILTIN_PSRAWI
:
17084 case IX86_BUILTIN_PSRAWI128
:
17085 case IX86_BUILTIN_PSRAWI128_MASK
:
17086 case IX86_BUILTIN_PSRAWI256
:
17087 case IX86_BUILTIN_PSRAWI256_MASK
:
17088 case IX86_BUILTIN_PSRAWI512
:
17092 case IX86_BUILTIN_PSRLD
:
17093 case IX86_BUILTIN_PSRLD128
:
17094 case IX86_BUILTIN_PSRLD128_MASK
:
17095 case IX86_BUILTIN_PSRLD256
:
17096 case IX86_BUILTIN_PSRLD256_MASK
:
17097 case IX86_BUILTIN_PSRLD512
:
17098 case IX86_BUILTIN_PSRLDI
:
17099 case IX86_BUILTIN_PSRLDI128
:
17100 case IX86_BUILTIN_PSRLDI128_MASK
:
17101 case IX86_BUILTIN_PSRLDI256
:
17102 case IX86_BUILTIN_PSRLDI256_MASK
:
17103 case IX86_BUILTIN_PSRLDI512
:
17104 case IX86_BUILTIN_PSRLQ
:
17105 case IX86_BUILTIN_PSRLQ128
:
17106 case IX86_BUILTIN_PSRLQ128_MASK
:
17107 case IX86_BUILTIN_PSRLQ256
:
17108 case IX86_BUILTIN_PSRLQ256_MASK
:
17109 case IX86_BUILTIN_PSRLQ512
:
17110 case IX86_BUILTIN_PSRLQI
:
17111 case IX86_BUILTIN_PSRLQI128
:
17112 case IX86_BUILTIN_PSRLQI128_MASK
:
17113 case IX86_BUILTIN_PSRLQI256
:
17114 case IX86_BUILTIN_PSRLQI256_MASK
:
17115 case IX86_BUILTIN_PSRLQI512
:
17116 case IX86_BUILTIN_PSRLW
:
17117 case IX86_BUILTIN_PSRLW128
:
17118 case IX86_BUILTIN_PSRLW128_MASK
:
17119 case IX86_BUILTIN_PSRLW256
:
17120 case IX86_BUILTIN_PSRLW256_MASK
:
17121 case IX86_BUILTIN_PSRLW512
:
17122 case IX86_BUILTIN_PSRLWI
:
17123 case IX86_BUILTIN_PSRLWI128
:
17124 case IX86_BUILTIN_PSRLWI128_MASK
:
17125 case IX86_BUILTIN_PSRLWI256
:
17126 case IX86_BUILTIN_PSRLWI256_MASK
:
17127 case IX86_BUILTIN_PSRLWI512
:
17131 case IX86_BUILTIN_PSLLVV16HI
:
17132 case IX86_BUILTIN_PSLLVV16SI
:
17133 case IX86_BUILTIN_PSLLVV2DI
:
17134 case IX86_BUILTIN_PSLLVV2DI_MASK
:
17135 case IX86_BUILTIN_PSLLVV32HI
:
17136 case IX86_BUILTIN_PSLLVV4DI
:
17137 case IX86_BUILTIN_PSLLVV4DI_MASK
:
17138 case IX86_BUILTIN_PSLLVV4SI
:
17139 case IX86_BUILTIN_PSLLVV4SI_MASK
:
17140 case IX86_BUILTIN_PSLLVV8DI
:
17141 case IX86_BUILTIN_PSLLVV8HI
:
17142 case IX86_BUILTIN_PSLLVV8SI
:
17143 case IX86_BUILTIN_PSLLVV8SI_MASK
:
17147 case IX86_BUILTIN_PSRAVQ128
:
17148 case IX86_BUILTIN_PSRAVQ256
:
17149 case IX86_BUILTIN_PSRAVV16HI
:
17150 case IX86_BUILTIN_PSRAVV16SI
:
17151 case IX86_BUILTIN_PSRAVV32HI
:
17152 case IX86_BUILTIN_PSRAVV4SI
:
17153 case IX86_BUILTIN_PSRAVV4SI_MASK
:
17154 case IX86_BUILTIN_PSRAVV8DI
:
17155 case IX86_BUILTIN_PSRAVV8HI
:
17156 case IX86_BUILTIN_PSRAVV8SI
:
17157 case IX86_BUILTIN_PSRAVV8SI_MASK
:
17161 case IX86_BUILTIN_PSRLVV16HI
:
17162 case IX86_BUILTIN_PSRLVV16SI
:
17163 case IX86_BUILTIN_PSRLVV2DI
:
17164 case IX86_BUILTIN_PSRLVV2DI_MASK
:
17165 case IX86_BUILTIN_PSRLVV32HI
:
17166 case IX86_BUILTIN_PSRLVV4DI
:
17167 case IX86_BUILTIN_PSRLVV4DI_MASK
:
17168 case IX86_BUILTIN_PSRLVV4SI
:
17169 case IX86_BUILTIN_PSRLVV4SI_MASK
:
17170 case IX86_BUILTIN_PSRLVV8DI
:
17171 case IX86_BUILTIN_PSRLVV8HI
:
17172 case IX86_BUILTIN_PSRLVV8SI
:
17173 case IX86_BUILTIN_PSRLVV8SI_MASK
:
17179 gcc_assert (n_args
>= 2);
17180 if (TREE_CODE (args
[0]) != VECTOR_CST
)
17182 mask
= HOST_WIDE_INT_M1U
;
17185 /* This is masked shift. */
17186 if (!tree_fits_uhwi_p (args
[n_args
- 1])
17187 || TREE_SIDE_EFFECTS (args
[n_args
- 2]))
17189 mask
= tree_to_uhwi (args
[n_args
- 1]);
17190 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0]));
17191 mask
|= HOST_WIDE_INT_M1U
<< elems
;
17192 if (mask
!= HOST_WIDE_INT_M1U
17193 && TREE_CODE (args
[n_args
- 2]) != VECTOR_CST
)
17195 if (mask
== (HOST_WIDE_INT_M1U
<< elems
))
17196 return args
[n_args
- 2];
17198 if (is_vshift
&& TREE_CODE (args
[1]) != VECTOR_CST
)
17200 if (tree tem
= (is_vshift
? integer_one_node
17201 : ix86_vector_shift_count (args
[1])))
17203 unsigned HOST_WIDE_INT count
= tree_to_uhwi (tem
);
17204 unsigned HOST_WIDE_INT prec
17205 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args
[0])));
17206 if (count
== 0 && mask
== HOST_WIDE_INT_M1U
)
17210 if (rcode
== ASHIFTRT
)
17212 else if (mask
== HOST_WIDE_INT_M1U
)
17213 return build_zero_cst (TREE_TYPE (args
[0]));
17215 tree countt
= NULL_TREE
;
17219 countt
= integer_zero_node
;
17221 countt
= build_int_cst (integer_type_node
, count
);
17223 tree_vector_builder builder
;
17224 builder
.new_unary_operation (TREE_TYPE (args
[0]), args
[0],
17226 unsigned int cnt
= builder
.encoded_nelts ();
17227 for (unsigned int i
= 0; i
< cnt
; ++i
)
17229 tree elt
= VECTOR_CST_ELT (args
[0], i
);
17230 if (TREE_CODE (elt
) != INTEGER_CST
|| TREE_OVERFLOW (elt
))
17232 tree type
= TREE_TYPE (elt
);
17233 if (rcode
== LSHIFTRT
)
17234 elt
= fold_convert (unsigned_type_for (type
), elt
);
17237 countt
= VECTOR_CST_ELT (args
[1], i
);
17238 if (TREE_CODE (countt
) != INTEGER_CST
17239 || TREE_OVERFLOW (countt
))
17241 if (wi::neg_p (wi::to_wide (countt
))
17242 || wi::to_widest (countt
) >= prec
)
17244 if (rcode
== ASHIFTRT
)
17245 countt
= build_int_cst (TREE_TYPE (countt
),
17249 elt
= build_zero_cst (TREE_TYPE (elt
));
17250 countt
= build_zero_cst (TREE_TYPE (countt
));
17254 else if (count
>= prec
)
17255 elt
= build_zero_cst (TREE_TYPE (elt
));
17256 elt
= const_binop (rcode
== ASHIFT
17257 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
17258 TREE_TYPE (elt
), elt
, countt
);
17259 if (!elt
|| TREE_CODE (elt
) != INTEGER_CST
)
17261 if (rcode
== LSHIFTRT
)
17262 elt
= fold_convert (type
, elt
);
17263 if ((mask
& (HOST_WIDE_INT_1U
<< i
)) == 0)
17265 elt
= VECTOR_CST_ELT (args
[n_args
- 2], i
);
17266 if (TREE_CODE (elt
) != INTEGER_CST
17267 || TREE_OVERFLOW (elt
))
17270 builder
.quick_push (elt
);
17272 return builder
.build ();
17281 #ifdef SUBTARGET_FOLD_BUILTIN
17282 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
17288 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17289 constant) in GIMPLE. */
17292 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
17294 gimple
*stmt
= gsi_stmt (*gsi
);
17295 tree fndecl
= gimple_call_fndecl (stmt
);
17296 gcc_checking_assert (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
));
17297 int n_args
= gimple_call_num_args (stmt
);
17298 enum ix86_builtins fn_code
= (enum ix86_builtins
) DECL_FUNCTION_CODE (fndecl
);
17299 tree decl
= NULL_TREE
;
17300 tree arg0
, arg1
, arg2
;
17301 enum rtx_code rcode
;
17302 unsigned HOST_WIDE_INT count
;
17307 case IX86_BUILTIN_TZCNT32
:
17308 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
17309 goto fold_tzcnt_lzcnt
;
17311 case IX86_BUILTIN_TZCNT64
:
17312 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
17313 goto fold_tzcnt_lzcnt
;
17315 case IX86_BUILTIN_LZCNT32
:
17316 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
17317 goto fold_tzcnt_lzcnt
;
17319 case IX86_BUILTIN_LZCNT64
:
17320 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
17321 goto fold_tzcnt_lzcnt
;
17324 gcc_assert (n_args
== 1);
17325 arg0
= gimple_call_arg (stmt
, 0);
17326 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
17328 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
17329 /* If arg0 is provably non-zero, optimize into generic
17330 __builtin_c[tl]z{,ll} function the middle-end handles
17332 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
17335 location_t loc
= gimple_location (stmt
);
17336 gimple
*g
= gimple_build_call (decl
, 1, arg0
);
17337 gimple_set_location (g
, loc
);
17338 tree lhs
= make_ssa_name (integer_type_node
);
17339 gimple_call_set_lhs (g
, lhs
);
17340 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
17341 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
17342 gimple_set_location (g
, loc
);
17343 gsi_replace (gsi
, g
, false);
17348 case IX86_BUILTIN_BZHI32
:
17349 case IX86_BUILTIN_BZHI64
:
17350 gcc_assert (n_args
== 2);
17351 arg1
= gimple_call_arg (stmt
, 1);
17352 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
17354 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
17355 arg0
= gimple_call_arg (stmt
, 0);
17356 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
17358 location_t loc
= gimple_location (stmt
);
17359 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17360 gimple_set_location (g
, loc
);
17361 gsi_replace (gsi
, g
, false);
17366 case IX86_BUILTIN_PDEP32
:
17367 case IX86_BUILTIN_PDEP64
:
17368 case IX86_BUILTIN_PEXT32
:
17369 case IX86_BUILTIN_PEXT64
:
17370 gcc_assert (n_args
== 2);
17371 arg1
= gimple_call_arg (stmt
, 1);
17372 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
17374 location_t loc
= gimple_location (stmt
);
17375 arg0
= gimple_call_arg (stmt
, 0);
17376 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17377 gimple_set_location (g
, loc
);
17378 gsi_replace (gsi
, g
, false);
17383 case IX86_BUILTIN_PSLLD
:
17384 case IX86_BUILTIN_PSLLD128
:
17385 case IX86_BUILTIN_PSLLD128_MASK
:
17386 case IX86_BUILTIN_PSLLD256
:
17387 case IX86_BUILTIN_PSLLD256_MASK
:
17388 case IX86_BUILTIN_PSLLD512
:
17389 case IX86_BUILTIN_PSLLDI
:
17390 case IX86_BUILTIN_PSLLDI128
:
17391 case IX86_BUILTIN_PSLLDI128_MASK
:
17392 case IX86_BUILTIN_PSLLDI256
:
17393 case IX86_BUILTIN_PSLLDI256_MASK
:
17394 case IX86_BUILTIN_PSLLDI512
:
17395 case IX86_BUILTIN_PSLLQ
:
17396 case IX86_BUILTIN_PSLLQ128
:
17397 case IX86_BUILTIN_PSLLQ128_MASK
:
17398 case IX86_BUILTIN_PSLLQ256
:
17399 case IX86_BUILTIN_PSLLQ256_MASK
:
17400 case IX86_BUILTIN_PSLLQ512
:
17401 case IX86_BUILTIN_PSLLQI
:
17402 case IX86_BUILTIN_PSLLQI128
:
17403 case IX86_BUILTIN_PSLLQI128_MASK
:
17404 case IX86_BUILTIN_PSLLQI256
:
17405 case IX86_BUILTIN_PSLLQI256_MASK
:
17406 case IX86_BUILTIN_PSLLQI512
:
17407 case IX86_BUILTIN_PSLLW
:
17408 case IX86_BUILTIN_PSLLW128
:
17409 case IX86_BUILTIN_PSLLW128_MASK
:
17410 case IX86_BUILTIN_PSLLW256
:
17411 case IX86_BUILTIN_PSLLW256_MASK
:
17412 case IX86_BUILTIN_PSLLW512_MASK
:
17413 case IX86_BUILTIN_PSLLWI
:
17414 case IX86_BUILTIN_PSLLWI128
:
17415 case IX86_BUILTIN_PSLLWI128_MASK
:
17416 case IX86_BUILTIN_PSLLWI256
:
17417 case IX86_BUILTIN_PSLLWI256_MASK
:
17418 case IX86_BUILTIN_PSLLWI512_MASK
:
17422 case IX86_BUILTIN_PSRAD
:
17423 case IX86_BUILTIN_PSRAD128
:
17424 case IX86_BUILTIN_PSRAD128_MASK
:
17425 case IX86_BUILTIN_PSRAD256
:
17426 case IX86_BUILTIN_PSRAD256_MASK
:
17427 case IX86_BUILTIN_PSRAD512
:
17428 case IX86_BUILTIN_PSRADI
:
17429 case IX86_BUILTIN_PSRADI128
:
17430 case IX86_BUILTIN_PSRADI128_MASK
:
17431 case IX86_BUILTIN_PSRADI256
:
17432 case IX86_BUILTIN_PSRADI256_MASK
:
17433 case IX86_BUILTIN_PSRADI512
:
17434 case IX86_BUILTIN_PSRAQ128_MASK
:
17435 case IX86_BUILTIN_PSRAQ256_MASK
:
17436 case IX86_BUILTIN_PSRAQ512
:
17437 case IX86_BUILTIN_PSRAQI128_MASK
:
17438 case IX86_BUILTIN_PSRAQI256_MASK
:
17439 case IX86_BUILTIN_PSRAQI512
:
17440 case IX86_BUILTIN_PSRAW
:
17441 case IX86_BUILTIN_PSRAW128
:
17442 case IX86_BUILTIN_PSRAW128_MASK
:
17443 case IX86_BUILTIN_PSRAW256
:
17444 case IX86_BUILTIN_PSRAW256_MASK
:
17445 case IX86_BUILTIN_PSRAW512
:
17446 case IX86_BUILTIN_PSRAWI
:
17447 case IX86_BUILTIN_PSRAWI128
:
17448 case IX86_BUILTIN_PSRAWI128_MASK
:
17449 case IX86_BUILTIN_PSRAWI256
:
17450 case IX86_BUILTIN_PSRAWI256_MASK
:
17451 case IX86_BUILTIN_PSRAWI512
:
17455 case IX86_BUILTIN_PSRLD
:
17456 case IX86_BUILTIN_PSRLD128
:
17457 case IX86_BUILTIN_PSRLD128_MASK
:
17458 case IX86_BUILTIN_PSRLD256
:
17459 case IX86_BUILTIN_PSRLD256_MASK
:
17460 case IX86_BUILTIN_PSRLD512
:
17461 case IX86_BUILTIN_PSRLDI
:
17462 case IX86_BUILTIN_PSRLDI128
:
17463 case IX86_BUILTIN_PSRLDI128_MASK
:
17464 case IX86_BUILTIN_PSRLDI256
:
17465 case IX86_BUILTIN_PSRLDI256_MASK
:
17466 case IX86_BUILTIN_PSRLDI512
:
17467 case IX86_BUILTIN_PSRLQ
:
17468 case IX86_BUILTIN_PSRLQ128
:
17469 case IX86_BUILTIN_PSRLQ128_MASK
:
17470 case IX86_BUILTIN_PSRLQ256
:
17471 case IX86_BUILTIN_PSRLQ256_MASK
:
17472 case IX86_BUILTIN_PSRLQ512
:
17473 case IX86_BUILTIN_PSRLQI
:
17474 case IX86_BUILTIN_PSRLQI128
:
17475 case IX86_BUILTIN_PSRLQI128_MASK
:
17476 case IX86_BUILTIN_PSRLQI256
:
17477 case IX86_BUILTIN_PSRLQI256_MASK
:
17478 case IX86_BUILTIN_PSRLQI512
:
17479 case IX86_BUILTIN_PSRLW
:
17480 case IX86_BUILTIN_PSRLW128
:
17481 case IX86_BUILTIN_PSRLW128_MASK
:
17482 case IX86_BUILTIN_PSRLW256
:
17483 case IX86_BUILTIN_PSRLW256_MASK
:
17484 case IX86_BUILTIN_PSRLW512
:
17485 case IX86_BUILTIN_PSRLWI
:
17486 case IX86_BUILTIN_PSRLWI128
:
17487 case IX86_BUILTIN_PSRLWI128_MASK
:
17488 case IX86_BUILTIN_PSRLWI256
:
17489 case IX86_BUILTIN_PSRLWI256_MASK
:
17490 case IX86_BUILTIN_PSRLWI512
:
17494 case IX86_BUILTIN_PSLLVV16HI
:
17495 case IX86_BUILTIN_PSLLVV16SI
:
17496 case IX86_BUILTIN_PSLLVV2DI
:
17497 case IX86_BUILTIN_PSLLVV2DI_MASK
:
17498 case IX86_BUILTIN_PSLLVV32HI
:
17499 case IX86_BUILTIN_PSLLVV4DI
:
17500 case IX86_BUILTIN_PSLLVV4DI_MASK
:
17501 case IX86_BUILTIN_PSLLVV4SI
:
17502 case IX86_BUILTIN_PSLLVV4SI_MASK
:
17503 case IX86_BUILTIN_PSLLVV8DI
:
17504 case IX86_BUILTIN_PSLLVV8HI
:
17505 case IX86_BUILTIN_PSLLVV8SI
:
17506 case IX86_BUILTIN_PSLLVV8SI_MASK
:
17510 case IX86_BUILTIN_PSRAVQ128
:
17511 case IX86_BUILTIN_PSRAVQ256
:
17512 case IX86_BUILTIN_PSRAVV16HI
:
17513 case IX86_BUILTIN_PSRAVV16SI
:
17514 case IX86_BUILTIN_PSRAVV32HI
:
17515 case IX86_BUILTIN_PSRAVV4SI
:
17516 case IX86_BUILTIN_PSRAVV4SI_MASK
:
17517 case IX86_BUILTIN_PSRAVV8DI
:
17518 case IX86_BUILTIN_PSRAVV8HI
:
17519 case IX86_BUILTIN_PSRAVV8SI
:
17520 case IX86_BUILTIN_PSRAVV8SI_MASK
:
17524 case IX86_BUILTIN_PSRLVV16HI
:
17525 case IX86_BUILTIN_PSRLVV16SI
:
17526 case IX86_BUILTIN_PSRLVV2DI
:
17527 case IX86_BUILTIN_PSRLVV2DI_MASK
:
17528 case IX86_BUILTIN_PSRLVV32HI
:
17529 case IX86_BUILTIN_PSRLVV4DI
:
17530 case IX86_BUILTIN_PSRLVV4DI_MASK
:
17531 case IX86_BUILTIN_PSRLVV4SI
:
17532 case IX86_BUILTIN_PSRLVV4SI_MASK
:
17533 case IX86_BUILTIN_PSRLVV8DI
:
17534 case IX86_BUILTIN_PSRLVV8HI
:
17535 case IX86_BUILTIN_PSRLVV8SI
:
17536 case IX86_BUILTIN_PSRLVV8SI_MASK
:
17542 gcc_assert (n_args
>= 2);
17543 arg0
= gimple_call_arg (stmt
, 0);
17544 arg1
= gimple_call_arg (stmt
, 1);
17547 /* This is masked shift. Only optimize if the mask is all ones. */
17548 tree argl
= gimple_call_arg (stmt
, n_args
- 1);
17549 if (!tree_fits_uhwi_p (argl
))
17551 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (argl
);
17552 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
17553 if ((mask
| (HOST_WIDE_INT_M1U
<< elems
)) != HOST_WIDE_INT_M1U
)
17558 if (TREE_CODE (arg1
) != VECTOR_CST
)
17560 count
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
)));
17561 if (integer_zerop (arg1
))
17563 else if (rcode
== ASHIFTRT
)
17566 for (unsigned int i
= 0; i
< VECTOR_CST_NELTS (arg1
); ++i
)
17568 tree elt
= VECTOR_CST_ELT (arg1
, i
);
17569 if (!wi::neg_p (wi::to_wide (elt
))
17570 && wi::to_widest (elt
) < count
)
17576 arg1
= ix86_vector_shift_count (arg1
);
17579 count
= tree_to_uhwi (arg1
);
17583 /* Just return the first argument for shift by 0. */
17584 location_t loc
= gimple_location (stmt
);
17585 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17586 gimple_set_location (g
, loc
);
17587 gsi_replace (gsi
, g
, false);
17590 if (rcode
!= ASHIFTRT
17591 && count
>= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
))))
17593 /* For shift counts equal or greater than precision, except for
17594 arithmetic right shift the result is zero. */
17595 location_t loc
= gimple_location (stmt
);
17596 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
17597 build_zero_cst (TREE_TYPE (arg0
)));
17598 gimple_set_location (g
, loc
);
17599 gsi_replace (gsi
, g
, false);
17604 case IX86_BUILTIN_SHUFPD
:
17605 arg2
= gimple_call_arg (stmt
, 2);
17606 if (TREE_CODE (arg2
) == INTEGER_CST
)
17608 location_t loc
= gimple_location (stmt
);
17609 unsigned HOST_WIDE_INT imask
= TREE_INT_CST_LOW (arg2
);
17610 arg0
= gimple_call_arg (stmt
, 0);
17611 arg1
= gimple_call_arg (stmt
, 1);
17612 tree itype
= long_long_integer_type_node
;
17613 tree vtype
= build_vector_type (itype
, 2); /* V2DI */
17614 tree_vector_builder
elts (vtype
, 2, 1);
17615 /* Ignore bits other than the lowest 2. */
17616 elts
.quick_push (build_int_cst (itype
, imask
& 1));
17618 elts
.quick_push (build_int_cst (itype
, 2 + (imask
& 1)));
17619 tree omask
= elts
.build ();
17620 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
17622 arg0
, arg1
, omask
);
17623 gimple_set_location (g
, loc
);
17624 gsi_replace (gsi
, g
, false);
17627 // Do not error yet, the constant could be propagated later?
17637 /* Handler for an SVML-style interface to
17638 a library with vectorized intrinsics. */
17641 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
17644 tree fntype
, new_fndecl
, args
;
17647 machine_mode el_mode
, in_mode
;
17650 /* The SVML is suitable for unsafe math only. */
17651 if (!flag_unsafe_math_optimizations
)
17654 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
17655 n
= TYPE_VECTOR_SUBPARTS (type_out
);
17656 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
17657 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
17658 if (el_mode
!= in_mode
17682 if ((el_mode
!= DFmode
|| n
!= 2)
17683 && (el_mode
!= SFmode
|| n
!= 4))
17691 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
17692 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
17694 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
17695 strcpy (name
, "vmlsLn4");
17696 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
17697 strcpy (name
, "vmldLn2");
17700 sprintf (name
, "vmls%s", bname
+10);
17701 name
[strlen (name
)-1] = '4';
17704 sprintf (name
, "vmld%s2", bname
+10);
17706 /* Convert to uppercase. */
17710 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
17714 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
17716 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
17718 /* Build a function declaration for the vectorized function. */
17719 new_fndecl
= build_decl (BUILTINS_LOCATION
,
17720 FUNCTION_DECL
, get_identifier (name
), fntype
);
17721 TREE_PUBLIC (new_fndecl
) = 1;
17722 DECL_EXTERNAL (new_fndecl
) = 1;
17723 DECL_IS_NOVOPS (new_fndecl
) = 1;
17724 TREE_READONLY (new_fndecl
) = 1;
17729 /* Handler for an ACML-style interface to
17730 a library with vectorized intrinsics. */
17733 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
17735 char name
[20] = "__vr.._";
17736 tree fntype
, new_fndecl
, args
;
17739 machine_mode el_mode
, in_mode
;
17742 /* The ACML is 64bits only and suitable for unsafe math only as
17743 it does not correctly support parts of IEEE with the required
17744 precision such as denormals. */
17746 || !flag_unsafe_math_optimizations
)
17749 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
17750 n
= TYPE_VECTOR_SUBPARTS (type_out
);
17751 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
17752 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
17753 if (el_mode
!= in_mode
17765 if (el_mode
== DFmode
&& n
== 2)
17770 else if (el_mode
== SFmode
&& n
== 4)
17783 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
17784 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
17785 sprintf (name
+ 7, "%s", bname
+10);
17788 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
17792 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
17794 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
17796 /* Build a function declaration for the vectorized function. */
17797 new_fndecl
= build_decl (BUILTINS_LOCATION
,
17798 FUNCTION_DECL
, get_identifier (name
), fntype
);
17799 TREE_PUBLIC (new_fndecl
) = 1;
17800 DECL_EXTERNAL (new_fndecl
) = 1;
17801 DECL_IS_NOVOPS (new_fndecl
) = 1;
17802 TREE_READONLY (new_fndecl
) = 1;
17807 /* Returns a decl of a function that implements scatter store with
17808 register type VECTYPE and index type INDEX_TYPE and SCALE.
17809 Return NULL_TREE if it is not available. */
17812 ix86_vectorize_builtin_scatter (const_tree vectype
,
17813 const_tree index_type
, int scale
)
17816 enum ix86_builtins code
;
17818 if (!TARGET_AVX512F
)
17821 if ((TREE_CODE (index_type
) != INTEGER_TYPE
17822 && !POINTER_TYPE_P (index_type
))
17823 || (TYPE_MODE (index_type
) != SImode
17824 && TYPE_MODE (index_type
) != DImode
))
17827 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
17830 /* v*scatter* insn sign extends index to pointer mode. */
17831 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
17832 && TYPE_UNSIGNED (index_type
))
17835 /* Scale can be 1, 2, 4 or 8. */
17838 || (scale
& (scale
- 1)) != 0)
17841 si
= TYPE_MODE (index_type
) == SImode
;
17842 switch (TYPE_MODE (vectype
))
17845 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
17848 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
17851 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
17854 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
17857 if (TARGET_AVX512VL
)
17858 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DF
: IX86_BUILTIN_SCATTERDIV4DF
;
17863 if (TARGET_AVX512VL
)
17864 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DI
: IX86_BUILTIN_SCATTERDIV4DI
;
17869 if (TARGET_AVX512VL
)
17870 code
= si
? IX86_BUILTIN_SCATTERSIV8SF
: IX86_BUILTIN_SCATTERALTDIV8SF
;
17875 if (TARGET_AVX512VL
)
17876 code
= si
? IX86_BUILTIN_SCATTERSIV8SI
: IX86_BUILTIN_SCATTERALTDIV8SI
;
17881 if (TARGET_AVX512VL
)
17882 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DF
: IX86_BUILTIN_SCATTERDIV2DF
;
17887 if (TARGET_AVX512VL
)
17888 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DI
: IX86_BUILTIN_SCATTERDIV2DI
;
17893 if (TARGET_AVX512VL
)
17894 code
= si
? IX86_BUILTIN_SCATTERSIV4SF
: IX86_BUILTIN_SCATTERALTDIV4SF
;
17899 if (TARGET_AVX512VL
)
17900 code
= si
? IX86_BUILTIN_SCATTERSIV4SI
: IX86_BUILTIN_SCATTERALTDIV4SI
;
17908 return get_ix86_builtin (code
);
17911 /* Return true if it is safe to use the rsqrt optabs to optimize
17917 return (TARGET_SSE
&& TARGET_SSE_MATH
17918 && flag_finite_math_only
17919 && !flag_trapping_math
17920 && flag_unsafe_math_optimizations
);
17923 /* Helper for avx_vpermilps256_operand et al. This is also used by
17924 the expansion functions to turn the parallel back into a mask.
17925 The return value is 0 for no match and the imm8+1 for a match. */
17928 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
17930 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
17932 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
17934 if (XVECLEN (par
, 0) != (int) nelt
)
17937 /* Validate that all of the elements are constants, and not totally
17938 out of range. Copy the data into an integral array to make the
17939 subsequent checks easier. */
17940 for (i
= 0; i
< nelt
; ++i
)
17942 rtx er
= XVECEXP (par
, 0, i
);
17943 unsigned HOST_WIDE_INT ei
;
17945 if (!CONST_INT_P (er
))
17956 /* In the 512-bit DFmode case, we can only move elements within
17957 a 128-bit lane. First fill the second part of the mask,
17959 for (i
= 4; i
< 6; ++i
)
17961 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
17963 mask
|= (ipar
[i
] - 4) << i
;
17965 for (i
= 6; i
< 8; ++i
)
17969 mask
|= (ipar
[i
] - 6) << i
;
17974 /* In the 256-bit DFmode case, we can only move elements within
17976 for (i
= 0; i
< 2; ++i
)
17980 mask
|= ipar
[i
] << i
;
17982 for (i
= 2; i
< 4; ++i
)
17986 mask
|= (ipar
[i
] - 2) << i
;
17991 /* In 512 bit SFmode case, permutation in the upper 256 bits
17992 must mirror the permutation in the lower 256-bits. */
17993 for (i
= 0; i
< 8; ++i
)
17994 if (ipar
[i
] + 8 != ipar
[i
+ 8])
17999 /* In 256 bit SFmode case, we have full freedom of
18000 movement within the low 128-bit lane, but the high 128-bit
18001 lane must mirror the exact same pattern. */
18002 for (i
= 0; i
< 4; ++i
)
18003 if (ipar
[i
] + 4 != ipar
[i
+ 4])
18010 /* In the 128-bit case, we've full freedom in the placement of
18011 the elements from the source operand. */
18012 for (i
= 0; i
< nelt
; ++i
)
18013 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
18017 gcc_unreachable ();
18020 /* Make sure success has a non-zero value by adding one. */
18024 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18025 the expansion functions to turn the parallel back into a mask.
18026 The return value is 0 for no match and the imm8+1 for a match. */
18029 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
18031 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
18033 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
18035 if (XVECLEN (par
, 0) != (int) nelt
)
18038 /* Validate that all of the elements are constants, and not totally
18039 out of range. Copy the data into an integral array to make the
18040 subsequent checks easier. */
18041 for (i
= 0; i
< nelt
; ++i
)
18043 rtx er
= XVECEXP (par
, 0, i
);
18044 unsigned HOST_WIDE_INT ei
;
18046 if (!CONST_INT_P (er
))
18049 if (ei
>= 2 * nelt
)
18054 /* Validate that the halves of the permute are halves. */
18055 for (i
= 0; i
< nelt2
- 1; ++i
)
18056 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18058 for (i
= nelt2
; i
< nelt
- 1; ++i
)
18059 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18062 /* Reconstruct the mask. */
18063 for (i
= 0; i
< 2; ++i
)
18065 unsigned e
= ipar
[i
* nelt2
];
18069 mask
|= e
<< (i
* 4);
18072 /* Make sure success has a non-zero value by adding one. */
18076 /* Return a register priority for hard reg REGNO. */
18078 ix86_register_priority (int hard_regno
)
18080 /* ebp and r13 as the base always wants a displacement, r12 as the
18081 base always wants an index. So discourage their usage in an
18083 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
18085 if (hard_regno
== BP_REG
)
18087 /* New x86-64 int registers result in bigger code size. Discourage
18089 if (IN_RANGE (hard_regno
, FIRST_REX_INT_REG
, LAST_REX_INT_REG
))
18091 /* New x86-64 SSE registers result in bigger code size. Discourage
18093 if (IN_RANGE (hard_regno
, FIRST_REX_SSE_REG
, LAST_REX_SSE_REG
))
18095 if (IN_RANGE (hard_regno
, FIRST_EXT_REX_SSE_REG
, LAST_EXT_REX_SSE_REG
))
18097 /* Usage of AX register results in smaller code. Prefer it. */
18098 if (hard_regno
== AX_REG
)
18103 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18105 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18106 QImode must go into class Q_REGS.
18107 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18108 movdf to do mem-to-mem moves through integer regs. */
18111 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
18113 machine_mode mode
= GET_MODE (x
);
18115 /* We're only allowed to return a subclass of CLASS. Many of the
18116 following checks fail for NO_REGS, so eliminate that early. */
18117 if (regclass
== NO_REGS
)
18120 /* All classes can load zeros. */
18121 if (x
== CONST0_RTX (mode
))
18124 /* Force constants into memory if we are loading a (nonzero) constant into
18125 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18126 instructions to load from a constant. */
18128 && (MAYBE_MMX_CLASS_P (regclass
)
18129 || MAYBE_SSE_CLASS_P (regclass
)
18130 || MAYBE_MASK_CLASS_P (regclass
)))
18133 /* Floating-point constants need more complex checks. */
18134 if (CONST_DOUBLE_P (x
))
18136 /* General regs can load everything. */
18137 if (INTEGER_CLASS_P (regclass
))
18140 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18141 zero above. We only want to wind up preferring 80387 registers if
18142 we plan on doing computation with them. */
18143 if (IS_STACK_MODE (mode
)
18144 && standard_80387_constant_p (x
) > 0)
18146 /* Limit class to FP regs. */
18147 if (FLOAT_CLASS_P (regclass
))
18154 /* Prefer SSE regs only, if we can use them for math. */
18155 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18156 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18158 /* Generally when we see PLUS here, it's the function invariant
18159 (plus soft-fp const_int). Which can only be computed into general
18161 if (GET_CODE (x
) == PLUS
)
18162 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18164 /* QImode constants are easy to load, but non-constant QImode data
18165 must go into Q_REGS. */
18166 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18168 if (Q_CLASS_P (regclass
))
18170 else if (reg_class_subset_p (Q_REGS
, regclass
))
18179 /* Discourage putting floating-point values in SSE registers unless
18180 SSE math is being used, and likewise for the 387 registers. */
18182 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
18184 machine_mode mode
= GET_MODE (x
);
18186 /* Restrict the output reload class to the register bank that we are doing
18187 math on. If we would like not to return a subset of CLASS, reject this
18188 alternative: if reload cannot do this, it will still use its choice. */
18189 mode
= GET_MODE (x
);
18190 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18191 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
18193 if (IS_STACK_MODE (mode
))
18194 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18200 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
18201 machine_mode mode
, secondary_reload_info
*sri
)
18203 /* Double-word spills from general registers to non-offsettable memory
18204 references (zero-extended addresses) require special handling. */
18207 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
18208 && INTEGER_CLASS_P (rclass
)
18209 && !offsettable_memref_p (x
))
18212 ? CODE_FOR_reload_noff_load
18213 : CODE_FOR_reload_noff_store
);
18214 /* Add the cost of moving address to a temporary. */
18215 sri
->extra_cost
= 1;
18220 /* QImode spills from non-QI registers require
18221 intermediate register on 32bit targets. */
18223 && ((!TARGET_64BIT
&& !in_p
18224 && INTEGER_CLASS_P (rclass
)
18225 && MAYBE_NON_Q_CLASS_P (rclass
))
18226 || (!TARGET_AVX512DQ
18227 && MAYBE_MASK_CLASS_P (rclass
))))
18229 int regno
= true_regnum (x
);
18231 /* Return Q_REGS if the operand is in memory. */
18238 /* This condition handles corner case where an expression involving
18239 pointers gets vectorized. We're trying to use the address of a
18240 stack slot as a vector initializer.
18242 (set (reg:V2DI 74 [ vect_cst_.2 ])
18243 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18245 Eventually frame gets turned into sp+offset like this:
18247 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18248 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18249 (const_int 392 [0x188]))))
18251 That later gets turned into:
18253 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18254 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18255 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18257 We'll have the following reload recorded:
18259 Reload 0: reload_in (DI) =
18260 (plus:DI (reg/f:DI 7 sp)
18261 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18262 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18263 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18264 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18265 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18266 reload_reg_rtx: (reg:V2DI 22 xmm1)
18268 Which isn't going to work since SSE instructions can't handle scalar
18269 additions. Returning GENERAL_REGS forces the addition into integer
18270 register and reload can handle subsequent reloads without problems. */
18272 if (in_p
&& GET_CODE (x
) == PLUS
18273 && SSE_CLASS_P (rclass
)
18274 && SCALAR_INT_MODE_P (mode
))
18275 return GENERAL_REGS
;
18280 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18283 ix86_class_likely_spilled_p (reg_class_t rclass
)
18294 case SSE_FIRST_REG
:
18296 case FP_SECOND_REG
:
18306 /* If we are copying between registers from different register sets
18307 (e.g. FP and integer), we may need a memory location.
18309 The function can't work reliably when one of the CLASSES is a class
18310 containing registers from multiple sets. We avoid this by never combining
18311 different sets in a single alternative in the machine description.
18312 Ensure that this constraint holds to avoid unexpected surprises.
18314 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18315 so do not enforce these sanity checks.
18317 To optimize register_move_cost performance, define inline variant. */
18320 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18321 reg_class_t class2
, int strict
)
18323 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
18326 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18327 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18328 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18329 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18330 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18331 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
18332 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
18333 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
18335 gcc_assert (!strict
|| lra_in_progress
);
18339 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18342 /* Between mask and general, we have moves no larger than word size. */
18343 if ((MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
18344 && (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
18347 /* ??? This is a lie. We do have moves between mmx/general, and for
18348 mmx/sse2. But by saying we need secondary memory we discourage the
18349 register allocator from using the mmx registers unless needed. */
18350 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18353 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18355 /* SSE1 doesn't have any direct moves from other classes. */
18359 /* If the target says that inter-unit moves are more expensive
18360 than moving through memory, then don't generate them. */
18361 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
18362 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
18365 /* Between SSE and general, we have moves no larger than word size. */
18366 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18373 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18376 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18377 reg_class_t class2
)
18379 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
18382 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18384 get_secondary_mem widens integral modes to BITS_PER_WORD.
18385 There is no need to emit full 64 bit move on 64 bit targets
18386 for integral modes that can be moved using 32 bit move. */
18388 static machine_mode
18389 ix86_secondary_memory_needed_mode (machine_mode mode
)
18391 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
18392 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
18396 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18398 On the 80386, this is the size of MODE in words,
18399 except in the FP regs, where a single reg is always enough. */
18401 static unsigned char
18402 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
18404 if (MAYBE_INTEGER_CLASS_P (rclass
))
18406 if (mode
== XFmode
)
18407 return (TARGET_64BIT
? 2 : 3);
18408 else if (mode
== XCmode
)
18409 return (TARGET_64BIT
? 4 : 6);
18411 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
18415 if (COMPLEX_MODE_P (mode
))
18422 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18425 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
18426 reg_class_t regclass
)
18431 /* x87 registers can't do subreg at all, as all values are reformatted
18432 to extended precision. */
18433 if (MAYBE_FLOAT_CLASS_P (regclass
))
18436 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
18438 /* Vector registers do not support QI or HImode loads. If we don't
18439 disallow a change to these modes, reload will assume it's ok to
18440 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18441 the vec_dupv4hi pattern. */
18442 if (GET_MODE_SIZE (from
) < 4)
18449 /* Return index of MODE in the sse load/store tables. */
18452 sse_store_index (machine_mode mode
)
18454 switch (GET_MODE_SIZE (mode
))
18471 /* Return the cost of moving data of mode M between a
18472 register and memory. A value of 2 is the default; this cost is
18473 relative to those in `REGISTER_MOVE_COST'.
18475 This function is used extensively by register_move_cost that is used to
18476 build tables at startup. Make it inline in this case.
18477 When IN is 2, return maximum of in and out move cost.
18479 If moving between registers and memory is more expensive than
18480 between two registers, you should define this macro to express the
18483 Model also increased moving costs of QImode registers in non
18487 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
, int in
)
18490 if (FLOAT_CLASS_P (regclass
))
18508 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
18509 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
18511 if (SSE_CLASS_P (regclass
))
18513 int index
= sse_store_index (mode
);
18517 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
18518 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
18520 if (MMX_CLASS_P (regclass
))
18523 switch (GET_MODE_SIZE (mode
))
18535 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
18536 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
18538 switch (GET_MODE_SIZE (mode
))
18541 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
18544 return ix86_cost
->int_store
[0];
18545 if (TARGET_PARTIAL_REG_DEPENDENCY
18546 && optimize_function_for_speed_p (cfun
))
18547 cost
= ix86_cost
->movzbl_load
;
18549 cost
= ix86_cost
->int_load
[0];
18551 return MAX (cost
, ix86_cost
->int_store
[0]);
18557 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
18559 return ix86_cost
->movzbl_load
;
18561 return ix86_cost
->int_store
[0] + 4;
18566 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
18567 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
18570 cost
= MAX (ix86_cost
->int_load
[2], ix86_cost
->int_store
[2]);
18572 cost
= ix86_cost
->int_load
[2];
18574 cost
= ix86_cost
->int_store
[2];
18575 /* Multiply with the number of GPR moves needed. */
18576 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
18581 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
18583 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
18587 /* Return the cost of moving data from a register in class CLASS1 to
18588 one in class CLASS2.
18590 It is not required that the cost always equal 2 when FROM is the same as TO;
18591 on some machines it is expensive to move between registers if they are not
18592 general registers. */
18595 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
18596 reg_class_t class2_i
)
18598 enum reg_class class1
= (enum reg_class
) class1_i
;
18599 enum reg_class class2
= (enum reg_class
) class2_i
;
18601 /* In case we require secondary memory, compute cost of the store followed
18602 by load. In order to avoid bad register allocation choices, we need
18603 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18605 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
18609 cost
+= inline_memory_move_cost (mode
, class1
, 2);
18610 cost
+= inline_memory_move_cost (mode
, class2
, 2);
18612 /* In case of copying from general_purpose_register we may emit multiple
18613 stores followed by single load causing memory size mismatch stall.
18614 Count this as arbitrarily high cost of 20. */
18615 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
18616 && TARGET_MEMORY_MISMATCH_STALL
18617 && targetm
.class_max_nregs (class1
, mode
)
18618 > targetm
.class_max_nregs (class2
, mode
))
18621 /* In the case of FP/MMX moves, the registers actually overlap, and we
18622 have to switch modes in order to treat them differently. */
18623 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18624 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18630 /* Moves between SSE/MMX and integer unit are expensive. */
18631 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18632 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18634 /* ??? By keeping returned value relatively high, we limit the number
18635 of moves between integer and MMX/SSE registers for all targets.
18636 Additionally, high value prevents problem with x86_modes_tieable_p(),
18637 where integer modes in MMX/SSE registers are not tieable
18638 because of missing QImode and HImode moves to, from or between
18639 MMX/SSE registers. */
18640 return MAX (8, MMX_CLASS_P (class1
) || MMX_CLASS_P (class2
)
18641 ? ix86_cost
->mmxsse_to_integer
: ix86_cost
->ssemmx_to_integer
);
18643 if (MAYBE_FLOAT_CLASS_P (class1
))
18644 return ix86_cost
->fp_move
;
18645 if (MAYBE_SSE_CLASS_P (class1
))
18647 if (GET_MODE_BITSIZE (mode
) <= 128)
18648 return ix86_cost
->xmm_move
;
18649 if (GET_MODE_BITSIZE (mode
) <= 256)
18650 return ix86_cost
->ymm_move
;
18651 return ix86_cost
->zmm_move
;
18653 if (MAYBE_MMX_CLASS_P (class1
))
18654 return ix86_cost
->mmx_move
;
18658 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
18659 words of a value of mode MODE but can be less for certain modes in
18660 special long registers.
18662 Actually there are no two word move instructions for consecutive
18663 registers. And only registers 0-3 may have mov byte instructions
18664 applied to them. */
18666 static unsigned int
18667 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
18669 if (GENERAL_REGNO_P (regno
))
18671 if (mode
== XFmode
)
18672 return TARGET_64BIT
? 2 : 3;
18673 if (mode
== XCmode
)
18674 return TARGET_64BIT
? 4 : 6;
18675 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
18677 if (COMPLEX_MODE_P (mode
))
18679 if (mode
== V64SFmode
|| mode
== V64SImode
)
18684 /* Implement TARGET_HARD_REGNO_MODE_OK. */
18687 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
18689 /* Flags and only flags can only hold CCmode values. */
18690 if (CC_REGNO_P (regno
))
18691 return GET_MODE_CLASS (mode
) == MODE_CC
;
18692 if (GET_MODE_CLASS (mode
) == MODE_CC
18693 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18694 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18696 if (STACK_REGNO_P (regno
))
18697 return VALID_FP_MODE_P (mode
);
18698 if (MASK_REGNO_P (regno
))
18699 return (VALID_MASK_REG_MODE (mode
)
18700 || (TARGET_AVX512BW
18701 && VALID_MASK_AVX512BW_MODE (mode
)));
18702 if (SSE_REGNO_P (regno
))
18704 /* We implement the move patterns for all vector modes into and
18705 out of SSE registers, even when no operation instructions
18708 /* For AVX-512 we allow, regardless of regno:
18710 - any of 512-bit wide vector mode
18711 - any scalar mode. */
18714 || VALID_AVX512F_REG_MODE (mode
)
18715 || VALID_AVX512F_SCALAR_MODE (mode
)))
18718 /* For AVX-5124FMAPS or AVX-5124VNNIW
18719 allow V64SF and V64SI modes for special regnos. */
18720 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
18721 && (mode
== V64SFmode
|| mode
== V64SImode
)
18722 && MOD4_SSE_REGNO_P (regno
))
18725 /* TODO check for QI/HI scalars. */
18726 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
18727 if (TARGET_AVX512VL
18730 || VALID_AVX256_REG_MODE (mode
)
18731 || VALID_AVX512VL_128_REG_MODE (mode
)))
18734 /* xmm16-xmm31 are only available for AVX-512. */
18735 if (EXT_REX_SSE_REGNO_P (regno
))
18738 /* OImode and AVX modes are available only when AVX is enabled. */
18739 return ((TARGET_AVX
18740 && VALID_AVX256_REG_OR_OI_MODE (mode
))
18741 || VALID_SSE_REG_MODE (mode
)
18742 || VALID_SSE2_REG_MODE (mode
)
18743 || VALID_MMX_REG_MODE (mode
)
18744 || VALID_MMX_REG_MODE_3DNOW (mode
));
18746 if (MMX_REGNO_P (regno
))
18748 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18749 so if the register is available at all, then we can move data of
18750 the given mode into or out of it. */
18751 return (VALID_MMX_REG_MODE (mode
)
18752 || VALID_MMX_REG_MODE_3DNOW (mode
));
18755 if (mode
== QImode
)
18757 /* Take care for QImode values - they can be in non-QI regs,
18758 but then they do cause partial register stalls. */
18759 if (ANY_QI_REGNO_P (regno
))
18761 if (!TARGET_PARTIAL_REG_STALL
)
18763 /* LRA checks if the hard register is OK for the given mode.
18764 QImode values can live in non-QI regs, so we allow all
18766 if (lra_in_progress
)
18768 return !can_create_pseudo_p ();
18770 /* We handle both integer and floats in the general purpose registers. */
18771 else if (VALID_INT_MODE_P (mode
))
18773 else if (VALID_FP_MODE_P (mode
))
18775 else if (VALID_DFP_MODE_P (mode
))
18777 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18778 on to use that value in smaller contexts, this can easily force a
18779 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18780 supporting DImode, allow it. */
18781 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18787 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
18788 saves SSE registers across calls is Win64 (thus no need to check the
18789 current ABI here), and with AVX enabled Win64 only guarantees that
18790 the low 16 bytes are saved. */
18793 ix86_hard_regno_call_part_clobbered (rtx_insn
*insn ATTRIBUTE_UNUSED
,
18794 unsigned int regno
, machine_mode mode
)
18796 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
18799 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18800 tieable integer mode. */
18803 ix86_tieable_integer_mode_p (machine_mode mode
)
18812 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18815 return TARGET_64BIT
;
18822 /* Implement TARGET_MODES_TIEABLE_P.
18824 Return true if MODE1 is accessible in a register that can hold MODE2
18825 without copying. That is, all register classes that can hold MODE2
18826 can also hold MODE1. */
18829 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
18831 if (mode1
== mode2
)
18834 if (ix86_tieable_integer_mode_p (mode1
)
18835 && ix86_tieable_integer_mode_p (mode2
))
18838 /* MODE2 being XFmode implies fp stack or general regs, which means we
18839 can tie any smaller floating point modes to it. Note that we do not
18840 tie this with TFmode. */
18841 if (mode2
== XFmode
)
18842 return mode1
== SFmode
|| mode1
== DFmode
;
18844 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18845 that we can tie it with SFmode. */
18846 if (mode2
== DFmode
)
18847 return mode1
== SFmode
;
18849 /* If MODE2 is only appropriate for an SSE register, then tie with
18850 any other mode acceptable to SSE registers. */
18851 if (GET_MODE_SIZE (mode2
) == 64
18852 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18853 return (GET_MODE_SIZE (mode1
) == 64
18854 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18855 if (GET_MODE_SIZE (mode2
) == 32
18856 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18857 return (GET_MODE_SIZE (mode1
) == 32
18858 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18859 if (GET_MODE_SIZE (mode2
) == 16
18860 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18861 return (GET_MODE_SIZE (mode1
) == 16
18862 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18864 /* If MODE2 is appropriate for an MMX register, then tie
18865 with any other mode acceptable to MMX registers. */
18866 if (GET_MODE_SIZE (mode2
) == 8
18867 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18868 return (GET_MODE_SIZE (mode1
) == 8
18869 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18874 /* Return the cost of moving between two registers of mode MODE. */
18877 ix86_set_reg_reg_cost (machine_mode mode
)
18879 unsigned int units
= UNITS_PER_WORD
;
18881 switch (GET_MODE_CLASS (mode
))
18887 units
= GET_MODE_SIZE (CCmode
);
18891 if ((TARGET_SSE
&& mode
== TFmode
)
18892 || (TARGET_80387
&& mode
== XFmode
)
18893 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
18894 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
18895 units
= GET_MODE_SIZE (mode
);
18898 case MODE_COMPLEX_FLOAT
:
18899 if ((TARGET_SSE
&& mode
== TCmode
)
18900 || (TARGET_80387
&& mode
== XCmode
)
18901 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
18902 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
18903 units
= GET_MODE_SIZE (mode
);
18906 case MODE_VECTOR_INT
:
18907 case MODE_VECTOR_FLOAT
:
18908 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
18909 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
18910 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
18911 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
18912 || ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
18913 && VALID_MMX_REG_MODE (mode
)))
18914 units
= GET_MODE_SIZE (mode
);
18917 /* Return the cost of moving between two registers of mode MODE,
18918 assuming that the move will be in pieces of at most UNITS bytes. */
18919 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
18922 /* Return cost of vector operation in MODE given that scalar version has
18926 ix86_vec_cost (machine_mode mode
, int cost
)
18928 if (!VECTOR_MODE_P (mode
))
18931 if (GET_MODE_BITSIZE (mode
) == 128
18932 && TARGET_SSE_SPLIT_REGS
)
18934 if (GET_MODE_BITSIZE (mode
) > 128
18935 && TARGET_AVX128_OPTIMAL
)
18936 return cost
* GET_MODE_BITSIZE (mode
) / 128;
18940 /* Return cost of multiplication in MODE. */
18943 ix86_multiplication_cost (const struct processor_costs
*cost
,
18944 enum machine_mode mode
)
18946 machine_mode inner_mode
= mode
;
18947 if (VECTOR_MODE_P (mode
))
18948 inner_mode
= GET_MODE_INNER (mode
);
18950 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18951 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
18952 else if (X87_FLOAT_MODE_P (mode
))
18954 else if (FLOAT_MODE_P (mode
))
18955 return ix86_vec_cost (mode
,
18956 inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
);
18957 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
18959 /* vpmullq is used in this case. No emulation is needed. */
18960 if (TARGET_AVX512DQ
)
18961 return ix86_vec_cost (mode
, cost
->mulss
);
18963 /* V*QImode is emulated with 7-13 insns. */
18964 if (mode
== V16QImode
|| mode
== V32QImode
)
18967 if (TARGET_XOP
&& mode
== V16QImode
)
18969 else if (TARGET_SSSE3
)
18971 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* extra
);
18973 /* V*DImode is emulated with 5-8 insns. */
18974 else if (mode
== V2DImode
|| mode
== V4DImode
)
18976 if (TARGET_XOP
&& mode
== V2DImode
)
18977 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 3);
18979 return ix86_vec_cost (mode
, cost
->mulss
* 3 + cost
->sse_op
* 5);
18981 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
18982 insns, including two PMULUDQ. */
18983 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
18984 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5);
18986 return ix86_vec_cost (mode
, cost
->mulss
);
18989 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
18992 /* Return cost of multiplication in MODE. */
18995 ix86_division_cost (const struct processor_costs
*cost
,
18996 enum machine_mode mode
)
18998 machine_mode inner_mode
= mode
;
18999 if (VECTOR_MODE_P (mode
))
19000 inner_mode
= GET_MODE_INNER (mode
);
19002 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19003 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
19004 else if (X87_FLOAT_MODE_P (mode
))
19006 else if (FLOAT_MODE_P (mode
))
19007 return ix86_vec_cost (mode
,
19008 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
);
19010 return cost
->divide
[MODE_INDEX (mode
)];
19013 #define COSTS_N_BYTES(N) ((N) * 2)
19015 /* Return cost of shift in MODE.
19016 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19017 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19018 if op1 is a result of subreg.
19020 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19023 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
19024 enum machine_mode mode
, bool constant_op1
,
19025 HOST_WIDE_INT op1_val
,
19028 bool shift_and_truncate
,
19029 bool *skip_op0
, bool *skip_op1
)
19032 *skip_op0
= *skip_op1
= false;
19033 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19035 /* V*QImode is emulated with 1-11 insns. */
19036 if (mode
== V16QImode
|| mode
== V32QImode
)
19039 if (TARGET_XOP
&& mode
== V16QImode
)
19041 /* For XOP we use vpshab, which requires a broadcast of the
19042 value to the variable shift insn. For constants this
19043 means a V16Q const in mem; even when we can perform the
19044 shift with one insn set the cost to prefer paddb. */
19049 return ix86_vec_cost (mode
,
19054 (GET_MODE_UNIT_SIZE (mode
))));
19058 else if (TARGET_SSSE3
)
19060 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
19063 return ix86_vec_cost (mode
, cost
->sse_op
);
19065 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19070 return cost
->shift_const
+ COSTS_N_INSNS (2);
19072 return cost
->shift_const
* 2;
19077 return cost
->shift_var
* 2;
19079 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19085 return cost
->shift_const
;
19086 else if (shift_and_truncate
)
19089 *skip_op0
= *skip_op1
= true;
19090 /* Return the cost after shift-and truncation. */
19091 return cost
->shift_var
;
19094 return cost
->shift_var
;
19096 return cost
->shift_const
;
19099 /* Compute a (partial) cost for rtx X. Return true if the complete
19100 cost has been computed, and false if subexpressions should be
19101 scanned. In either case, *TOTAL contains the cost result. */
19104 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
19105 int *total
, bool speed
)
19108 enum rtx_code code
= GET_CODE (x
);
19109 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
19110 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
19116 if (register_operand (SET_DEST (x
), VOIDmode
)
19117 && register_operand (SET_SRC (x
), VOIDmode
))
19119 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
19123 if (register_operand (SET_SRC (x
), VOIDmode
))
19124 /* Avoid potentially incorrect high cost from rtx_costs
19125 for non-tieable SUBREGs. */
19129 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
19131 if (CONSTANT_P (SET_SRC (x
)))
19132 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19133 a small value, possibly zero for cheap constants. */
19134 src_cost
+= COSTS_N_INSNS (1);
19137 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
19144 if (x86_64_immediate_operand (x
, VOIDmode
))
19151 if (IS_STACK_MODE (mode
))
19152 switch (standard_80387_constant_p (x
))
19160 default: /* Other constants */
19167 switch (standard_sse_constant_p (x
, mode
))
19171 case 1: /* 0: xor eliminates false dependency */
19174 default: /* -1: cmp contains false dependency */
19180 case CONST_WIDE_INT
:
19181 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19182 it'll probably end up. Add a penalty for size. */
19183 *total
= (COSTS_N_INSNS (1)
19184 + (!TARGET_64BIT
&& flag_pic
)
19185 + (GET_MODE_SIZE (mode
) <= 4
19186 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
19190 /* The zero extensions is often completely free on x86_64, so make
19191 it as cheap as possible. */
19192 if (TARGET_64BIT
&& mode
== DImode
19193 && GET_MODE (XEXP (x
, 0)) == SImode
)
19195 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19196 *total
= cost
->add
;
19198 *total
= cost
->movzx
;
19202 *total
= cost
->movsx
;
19206 if (SCALAR_INT_MODE_P (mode
)
19207 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
19208 && CONST_INT_P (XEXP (x
, 1)))
19210 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19213 *total
= cost
->add
;
19216 if ((value
== 2 || value
== 3)
19217 && cost
->lea
<= cost
->shift_const
)
19219 *total
= cost
->lea
;
19229 bool skip_op0
, skip_op1
;
19230 *total
= ix86_shift_rotate_cost (cost
, mode
, CONSTANT_P (XEXP (x
, 1)),
19231 CONST_INT_P (XEXP (x
, 1))
19232 ? INTVAL (XEXP (x
, 1)) : -1,
19234 GET_CODE (XEXP (x
, 1)) == AND
,
19235 SUBREG_P (XEXP (x
, 1))
19236 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
,
19237 &skip_op0
, &skip_op1
);
19238 if (skip_op0
|| skip_op1
)
19241 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
19243 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
19252 gcc_assert (FLOAT_MODE_P (mode
));
19253 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
19255 *total
= ix86_vec_cost (mode
,
19256 GET_MODE_INNER (mode
) == SFmode
19257 ? cost
->fmass
: cost
->fmasd
);
19258 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
19260 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19262 if (GET_CODE (sub
) == NEG
)
19263 sub
= XEXP (sub
, 0);
19264 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
19267 if (GET_CODE (sub
) == NEG
)
19268 sub
= XEXP (sub
, 0);
19269 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
19274 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
19276 rtx op0
= XEXP (x
, 0);
19277 rtx op1
= XEXP (x
, 1);
19279 if (CONST_INT_P (XEXP (x
, 1)))
19281 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19282 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19286 /* This is arbitrary. */
19289 /* Compute costs correctly for widening multiplication. */
19290 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
19291 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19292 == GET_MODE_SIZE (mode
))
19294 int is_mulwiden
= 0;
19295 machine_mode inner_mode
= GET_MODE (op0
);
19297 if (GET_CODE (op0
) == GET_CODE (op1
))
19298 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19299 else if (CONST_INT_P (op1
))
19301 if (GET_CODE (op0
) == SIGN_EXTEND
)
19302 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19305 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19309 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19312 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
19313 + nbits
* cost
->mult_bit
19314 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
19315 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
19319 *total
= ix86_multiplication_cost (cost
, mode
);
19326 *total
= ix86_division_cost (cost
, mode
);
19330 if (GET_MODE_CLASS (mode
) == MODE_INT
19331 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
19333 if (GET_CODE (XEXP (x
, 0)) == PLUS
19334 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19335 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19336 && CONSTANT_P (XEXP (x
, 1)))
19338 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19339 if (val
== 2 || val
== 4 || val
== 8)
19341 *total
= cost
->lea
;
19342 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
19343 outer_code
, opno
, speed
);
19344 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
19345 outer_code
, opno
, speed
);
19346 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19347 outer_code
, opno
, speed
);
19351 else if (GET_CODE (XEXP (x
, 0)) == MULT
19352 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19354 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19355 if (val
== 2 || val
== 4 || val
== 8)
19357 *total
= cost
->lea
;
19358 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
19359 outer_code
, opno
, speed
);
19360 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19361 outer_code
, opno
, speed
);
19365 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19367 /* Add with carry, ignore the cost of adding a carry flag. */
19368 if (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 0), mode
))
19369 *total
= cost
->add
;
19372 *total
= cost
->lea
;
19373 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
19374 outer_code
, opno
, speed
);
19377 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
19378 outer_code
, opno
, speed
);
19379 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19380 outer_code
, opno
, speed
);
19387 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
19388 if (GET_MODE_CLASS (mode
) == MODE_INT
19389 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
19390 && GET_CODE (XEXP (x
, 0)) == MINUS
19391 && ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
))
19393 *total
= cost
->add
;
19394 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
19395 outer_code
, opno
, speed
);
19396 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19397 outer_code
, opno
, speed
);
19401 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19403 *total
= cost
->addss
;
19406 else if (X87_FLOAT_MODE_P (mode
))
19408 *total
= cost
->fadd
;
19411 else if (FLOAT_MODE_P (mode
))
19413 *total
= ix86_vec_cost (mode
, cost
->addss
);
19421 if (GET_MODE_CLASS (mode
) == MODE_INT
19422 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19424 *total
= (cost
->add
* 2
19425 + (rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
19426 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19427 + (rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
)
19428 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19434 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19436 *total
= cost
->sse_op
;
19439 else if (X87_FLOAT_MODE_P (mode
))
19441 *total
= cost
->fchs
;
19444 else if (FLOAT_MODE_P (mode
))
19446 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
19452 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19453 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
19454 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19455 *total
= cost
->add
* 2;
19457 *total
= cost
->add
;
19461 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19462 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19463 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19464 && XEXP (x
, 1) == const0_rtx
)
19466 /* This kind of construct is implemented using test[bwl].
19467 Treat it as if we had an AND. */
19468 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
19469 *total
= (cost
->add
19470 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, outer_code
,
19472 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
19476 /* The embedded comparison operand is completely free. */
19477 if (!general_operand (XEXP (x
, 0), GET_MODE (XEXP (x
, 0)))
19478 && XEXP (x
, 1) == const0_rtx
)
19484 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
19487 *total
= ix86_vec_cost (mode
, cost
->addss
);
19490 case FLOAT_TRUNCATE
:
19491 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
19492 *total
= cost
->fadd
;
19494 *total
= ix86_vec_cost (mode
, cost
->addss
);
19498 /* SSE requires memory load for the constant operand. It may make
19499 sense to account for this. Of course the constant operand may or
19500 may not be reused. */
19501 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19502 *total
= cost
->sse_op
;
19503 else if (X87_FLOAT_MODE_P (mode
))
19504 *total
= cost
->fabs
;
19505 else if (FLOAT_MODE_P (mode
))
19506 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
19510 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19511 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
19512 else if (X87_FLOAT_MODE_P (mode
))
19513 *total
= cost
->fsqrt
;
19514 else if (FLOAT_MODE_P (mode
))
19515 *total
= ix86_vec_cost (mode
,
19516 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
);
19520 if (XINT (x
, 1) == UNSPEC_TP
)
19526 case VEC_DUPLICATE
:
19527 /* ??? Assume all of these vector manipulation patterns are
19528 recognizable. In which case they all pretty much have the
19530 *total
= cost
->sse_op
;
19533 mask
= XEXP (x
, 2);
19534 /* This is masked instruction, assume the same cost,
19535 as nonmasked variant. */
19536 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
19537 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
19539 *total
= cost
->sse_op
;
19549 static int current_machopic_label_num
;
19551 /* Given a symbol name and its associated stub, write out the
19552 definition of the stub. */
19555 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19557 unsigned int length
;
19558 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19559 int label
= ++current_machopic_label_num
;
19561 /* For 64-bit we shouldn't get here. */
19562 gcc_assert (!TARGET_64BIT
);
19564 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19565 symb
= targetm
.strip_name_encoding (symb
);
19567 length
= strlen (stub
);
19568 binder_name
= XALLOCAVEC (char, length
+ 32);
19569 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19571 length
= strlen (symb
);
19572 symbol_name
= XALLOCAVEC (char, length
+ 32);
19573 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19575 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19577 if (MACHOPIC_ATT_STUB
)
19578 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
19579 else if (MACHOPIC_PURE
)
19580 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
19582 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19584 fprintf (file
, "%s:\n", stub
);
19585 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19587 if (MACHOPIC_ATT_STUB
)
19589 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19591 else if (MACHOPIC_PURE
)
19594 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19595 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19596 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
19597 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19598 label
, lazy_ptr_name
, label
);
19599 fprintf (file
, "\tjmp\t*%%ecx\n");
19602 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19604 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19605 it needs no stub-binding-helper. */
19606 if (MACHOPIC_ATT_STUB
)
19609 fprintf (file
, "%s:\n", binder_name
);
19613 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
19614 fprintf (file
, "\tpushl\t%%ecx\n");
19617 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19619 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
19621 /* N.B. Keep the correspondence of these
19622 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19623 old-pic/new-pic/non-pic stubs; altering this will break
19624 compatibility with existing dylibs. */
19627 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19628 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
19631 /* 16-byte -mdynamic-no-pic stub. */
19632 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
19634 fprintf (file
, "%s:\n", lazy_ptr_name
);
19635 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19636 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
19638 #endif /* TARGET_MACHO */
19640 /* Order the registers for register allocator. */
19643 x86_order_regs_for_local_alloc (void)
19648 /* First allocate the local general purpose registers. */
19649 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19650 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19651 reg_alloc_order
[pos
++] = i
;
19653 /* Global general purpose registers. */
19654 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19655 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19656 reg_alloc_order
[pos
++] = i
;
19658 /* x87 registers come first in case we are doing FP math
19660 if (!TARGET_SSE_MATH
)
19661 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19662 reg_alloc_order
[pos
++] = i
;
19664 /* SSE registers. */
19665 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19666 reg_alloc_order
[pos
++] = i
;
19667 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19668 reg_alloc_order
[pos
++] = i
;
19670 /* Extended REX SSE registers. */
19671 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
19672 reg_alloc_order
[pos
++] = i
;
19674 /* Mask register. */
19675 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
19676 reg_alloc_order
[pos
++] = i
;
19678 /* x87 registers. */
19679 if (TARGET_SSE_MATH
)
19680 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19681 reg_alloc_order
[pos
++] = i
;
19683 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19684 reg_alloc_order
[pos
++] = i
;
19686 /* Initialize the rest of array as we do not allocate some registers
19688 while (pos
< FIRST_PSEUDO_REGISTER
)
19689 reg_alloc_order
[pos
++] = 0;
19693 ix86_ms_bitfield_layout_p (const_tree record_type
)
19695 return ((TARGET_MS_BITFIELD_LAYOUT
19696 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19697 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
19700 /* Returns an expression indicating where the this parameter is
19701 located on entry to the FUNCTION. */
19704 x86_this_parameter (tree function
)
19706 tree type
= TREE_TYPE (function
);
19707 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19712 const int *parm_regs
;
19714 if (ix86_function_type_abi (type
) == MS_ABI
)
19715 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
19717 parm_regs
= x86_64_int_parameter_registers
;
19718 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
19721 nregs
= ix86_function_regparm (type
, function
);
19723 if (nregs
> 0 && !stdarg_p (type
))
19726 unsigned int ccvt
= ix86_get_callcvt (type
);
19728 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
19729 regno
= aggr
? DX_REG
: CX_REG
;
19730 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
19734 return gen_rtx_MEM (SImode
,
19735 plus_constant (Pmode
, stack_pointer_rtx
, 4));
19744 return gen_rtx_MEM (SImode
,
19745 plus_constant (Pmode
,
19746 stack_pointer_rtx
, 4));
19749 return gen_rtx_REG (SImode
, regno
);
19752 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
19756 /* Determine whether x86_output_mi_thunk can succeed. */
19759 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
19760 const_tree function
)
19762 /* 64-bit can handle anything. */
19766 /* For 32-bit, everything's fine if we have one free register. */
19767 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19770 /* Need a free register for vcall_offset. */
19774 /* Need a free register for GOT references. */
19775 if (flag_pic
&& !targetm
.binds_local_p (function
))
19778 /* Otherwise ok. */
19782 /* Output the assembler code for a thunk function. THUNK_DECL is the
19783 declaration for the thunk function itself, FUNCTION is the decl for
19784 the target function. DELTA is an immediate constant offset to be
19785 added to THIS. If VCALL_OFFSET is nonzero, the word at
19786 *(*this + vcall_offset) should be added to THIS. */
19789 x86_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
19790 HOST_WIDE_INT vcall_offset
, tree function
)
19792 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
19793 rtx this_param
= x86_this_parameter (function
);
19794 rtx this_reg
, tmp
, fnaddr
;
19795 unsigned int tmp_regno
;
19799 tmp_regno
= R10_REG
;
19802 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
19803 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
19804 tmp_regno
= AX_REG
;
19805 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
19806 tmp_regno
= DX_REG
;
19808 tmp_regno
= CX_REG
;
19811 emit_note (NOTE_INSN_PROLOGUE_END
);
19813 /* CET is enabled, insert EB instruction. */
19814 if ((flag_cf_protection
& CF_BRANCH
))
19815 emit_insn (gen_nop_endbr ());
19817 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19818 pull it in now and let DELTA benefit. */
19819 if (REG_P (this_param
))
19820 this_reg
= this_param
;
19821 else if (vcall_offset
)
19823 /* Put the this parameter into %eax. */
19824 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
19825 emit_move_insn (this_reg
, this_param
);
19828 this_reg
= NULL_RTX
;
19830 /* Adjust the this parameter by a fixed constant. */
19833 rtx delta_rtx
= GEN_INT (delta
);
19834 rtx delta_dst
= this_reg
? this_reg
: this_param
;
19838 if (!x86_64_general_operand (delta_rtx
, Pmode
))
19840 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
19841 emit_move_insn (tmp
, delta_rtx
);
19846 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
19849 /* Adjust the this parameter by a value stored in the vtable. */
19852 rtx vcall_addr
, vcall_mem
, this_mem
;
19854 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
19856 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
19857 if (Pmode
!= ptr_mode
)
19858 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
19859 emit_move_insn (tmp
, this_mem
);
19861 /* Adjust the this parameter. */
19862 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
19864 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
19866 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
19867 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
19868 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
19871 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
19872 if (Pmode
!= ptr_mode
)
19873 emit_insn (gen_addsi_1_zext (this_reg
,
19874 gen_rtx_REG (ptr_mode
,
19878 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
19881 /* If necessary, drop THIS back to its stack slot. */
19882 if (this_reg
&& this_reg
!= this_param
)
19883 emit_move_insn (this_param
, this_reg
);
19885 fnaddr
= XEXP (DECL_RTL (function
), 0);
19888 if (!flag_pic
|| targetm
.binds_local_p (function
)
19893 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
19894 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19895 fnaddr
= gen_const_mem (Pmode
, tmp
);
19900 if (!flag_pic
|| targetm
.binds_local_p (function
))
19903 else if (TARGET_MACHO
)
19905 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
19906 fnaddr
= XEXP (fnaddr
, 0);
19908 #endif /* TARGET_MACHO */
19911 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
19912 output_set_got (tmp
, NULL_RTX
);
19914 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
19915 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
19916 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
19917 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
19921 /* Our sibling call patterns do not allow memories, because we have no
19922 predicate that can distinguish between frame and non-frame memory.
19923 For our purposes here, we can get away with (ab)using a jump pattern,
19924 because we're going to do no optimization. */
19925 if (MEM_P (fnaddr
))
19927 if (sibcall_insn_operand (fnaddr
, word_mode
))
19929 fnaddr
= XEXP (DECL_RTL (function
), 0);
19930 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
19931 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
19932 tmp
= emit_call_insn (tmp
);
19933 SIBLING_CALL_P (tmp
) = 1;
19936 emit_jump_insn (gen_indirect_jump (fnaddr
));
19940 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
19942 // CM_LARGE_PIC always uses pseudo PIC register which is
19943 // uninitialized. Since FUNCTION is local and calling it
19944 // doesn't go through PLT, we use scratch register %r11 as
19945 // PIC register and initialize it here.
19946 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
19947 ix86_init_large_pic_reg (tmp_regno
);
19948 fnaddr
= legitimize_pic_address (fnaddr
,
19949 gen_rtx_REG (Pmode
, tmp_regno
));
19952 if (!sibcall_insn_operand (fnaddr
, word_mode
))
19954 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
19955 if (GET_MODE (fnaddr
) != word_mode
)
19956 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
19957 emit_move_insn (tmp
, fnaddr
);
19961 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
19962 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
19963 tmp
= emit_call_insn (tmp
);
19964 SIBLING_CALL_P (tmp
) = 1;
19968 /* Emit just enough of rest_of_compilation to get the insns emitted.
19969 Note that use_thunk calls assemble_start_function et al. */
19970 insn
= get_insns ();
19971 shorten_branches (insn
);
19972 assemble_start_function (thunk_fndecl
, fnname
);
19973 final_start_function (insn
, file
, 1);
19974 final (insn
, file
, 1);
19975 final_end_function ();
19976 assemble_end_function (thunk_fndecl
, fnname
);
19980 x86_file_start (void)
19982 default_file_start ();
19984 fputs ("\t.code16gcc\n", asm_out_file
);
19986 darwin_file_start ();
19988 if (X86_FILE_START_VERSION_DIRECTIVE
)
19989 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19990 if (X86_FILE_START_FLTUSED
)
19991 fputs ("\t.global\t__fltused\n", asm_out_file
);
19992 if (ix86_asm_dialect
== ASM_INTEL
)
19993 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
19997 x86_field_alignment (tree type
, int computed
)
20001 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
20004 return iamcu_alignment (type
, computed
);
20005 mode
= TYPE_MODE (strip_array_types (type
));
20006 if (mode
== DFmode
|| mode
== DCmode
20007 || GET_MODE_CLASS (mode
) == MODE_INT
20008 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
20009 return MIN (32, computed
);
20013 /* Print call to TARGET to FILE. */
20016 x86_print_call_or_nop (FILE *file
, const char *target
)
20018 if (flag_nop_mcount
|| !strcmp (target
, "nop"))
20019 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20020 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20022 fprintf (file
, "1:\tcall\t%s\n", target
);
20026 current_fentry_name (const char **name
)
20028 tree attr
= lookup_attribute ("fentry_name",
20029 DECL_ATTRIBUTES (current_function_decl
));
20032 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20037 current_fentry_section (const char **name
)
20039 tree attr
= lookup_attribute ("fentry_section",
20040 DECL_ATTRIBUTES (current_function_decl
));
20043 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20047 /* Output assembler code to FILE to increment profiler label # LABELNO
20048 for profiling a function entry. */
20050 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
20052 if (cfun
->machine
->endbr_queued_at_entrance
)
20053 fprintf (file
, "\t%s\n", TARGET_64BIT
? "endbr64" : "endbr32");
20055 const char *mcount_name
= MCOUNT_NAME
;
20057 if (current_fentry_name (&mcount_name
))
20059 else if (fentry_name
)
20060 mcount_name
= fentry_name
;
20061 else if (flag_fentry
)
20062 mcount_name
= MCOUNT_NAME_BEFORE_PROLOGUE
;
20066 #ifndef NO_PROFILE_COUNTERS
20067 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
20070 if (!TARGET_PECOFF
&& flag_pic
)
20071 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
20073 x86_print_call_or_nop (file
, mcount_name
);
20077 #ifndef NO_PROFILE_COUNTERS
20078 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
20081 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
20085 #ifndef NO_PROFILE_COUNTERS
20086 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
20089 x86_print_call_or_nop (file
, mcount_name
);
20092 if (flag_record_mcount
20093 || lookup_attribute ("fentry_section",
20094 DECL_ATTRIBUTES (current_function_decl
)))
20096 const char *sname
= "__mcount_loc";
20098 if (current_fentry_section (&sname
))
20100 else if (fentry_section
)
20101 sname
= fentry_section
;
20103 fprintf (file
, "\t.section %s, \"a\",@progbits\n", sname
);
20104 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
20105 fprintf (file
, "\t.previous\n");
20109 /* We don't have exact information about the insn sizes, but we may assume
20110 quite safely that we are informed about all 1 byte insns and memory
20111 address sizes. This is enough to eliminate unnecessary padding in
20115 ix86_min_insn_size (rtx_insn
*insn
)
20119 if (!INSN_P (insn
) || !active_insn_p (insn
))
20122 /* Discard alignments we've emit and jump instructions. */
20123 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
20124 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
20127 /* Important case - calls are always 5 bytes.
20128 It is common to have many calls in the row. */
20130 && symbolic_reference_mentioned_p (PATTERN (insn
))
20131 && !SIBLING_CALL_P (insn
))
20133 len
= get_attr_length (insn
);
20137 /* For normal instructions we rely on get_attr_length being exact,
20138 with a few exceptions. */
20139 if (!JUMP_P (insn
))
20141 enum attr_type type
= get_attr_type (insn
);
20146 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
20147 || asm_noperands (PATTERN (insn
)) >= 0)
20154 /* Otherwise trust get_attr_length. */
20158 l
= get_attr_length_address (insn
);
20159 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
20168 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20170 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20174 ix86_avoid_jump_mispredicts (void)
20176 rtx_insn
*insn
, *start
= get_insns ();
20177 int nbytes
= 0, njumps
= 0;
20178 bool isjump
= false;
20180 /* Look for all minimal intervals of instructions containing 4 jumps.
20181 The intervals are bounded by START and INSN. NBYTES is the total
20182 size of instructions in the interval including INSN and not including
20183 START. When the NBYTES is smaller than 16 bytes, it is possible
20184 that the end of START and INSN ends up in the same 16byte page.
20186 The smallest offset in the page INSN can start is the case where START
20187 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20188 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20190 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20191 have to, control transfer to label(s) can be performed through other
20192 means, and also we estimate minimum length of all asm stmts as 0. */
20193 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
20197 if (LABEL_P (insn
))
20199 align_flags alignment
= label_to_alignment (insn
);
20200 int align
= alignment
.levels
[0].log
;
20201 int max_skip
= alignment
.levels
[0].maxskip
;
20205 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20206 already in the current 16 byte page, because otherwise
20207 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20208 bytes to reach 16 byte boundary. */
20210 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
20213 fprintf (dump_file
, "Label %i with max_skip %i\n",
20214 INSN_UID (insn
), max_skip
);
20217 while (nbytes
+ max_skip
>= 16)
20219 start
= NEXT_INSN (start
);
20220 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20222 njumps
--, isjump
= true;
20225 nbytes
-= ix86_min_insn_size (start
);
20231 min_size
= ix86_min_insn_size (insn
);
20232 nbytes
+= min_size
;
20234 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
20235 INSN_UID (insn
), min_size
);
20236 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
20244 start
= NEXT_INSN (start
);
20245 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20247 njumps
--, isjump
= true;
20250 nbytes
-= ix86_min_insn_size (start
);
20252 gcc_assert (njumps
>= 0);
20254 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
20255 INSN_UID (start
), INSN_UID (insn
), nbytes
);
20257 if (njumps
== 3 && isjump
&& nbytes
< 16)
20259 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
20262 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
20263 INSN_UID (insn
), padsize
);
20264 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
20270 /* AMD Athlon works faster
20271 when RET is not destination of conditional jump or directly preceded
20272 by other jump instruction. We avoid the penalty by inserting NOP just
20273 before the RET instructions in such cases. */
20275 ix86_pad_returns (void)
20280 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20282 basic_block bb
= e
->src
;
20283 rtx_insn
*ret
= BB_END (bb
);
20285 bool replace
= false;
20287 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
20288 || optimize_bb_for_size_p (bb
))
20290 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
20291 if (active_insn_p (prev
) || LABEL_P (prev
))
20293 if (prev
&& LABEL_P (prev
))
20298 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20299 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
20300 && !(e
->flags
& EDGE_FALLTHRU
))
20308 prev
= prev_active_insn (ret
);
20310 && ((JUMP_P (prev
) && any_condjump_p (prev
))
20313 /* Empty functions get branch mispredict even when
20314 the jump destination is not visible to us. */
20315 if (!prev
&& !optimize_function_for_size_p (cfun
))
20320 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
20326 /* Count the minimum number of instructions in BB. Return 4 if the
20327 number of instructions >= 4. */
20330 ix86_count_insn_bb (basic_block bb
)
20333 int insn_count
= 0;
20335 /* Count number of instructions in this block. Return 4 if the number
20336 of instructions >= 4. */
20337 FOR_BB_INSNS (bb
, insn
)
20339 /* Only happen in exit blocks. */
20341 && ANY_RETURN_P (PATTERN (insn
)))
20344 if (NONDEBUG_INSN_P (insn
)
20345 && GET_CODE (PATTERN (insn
)) != USE
20346 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
20349 if (insn_count
>= 4)
20358 /* Count the minimum number of instructions in code path in BB.
20359 Return 4 if the number of instructions >= 4. */
20362 ix86_count_insn (basic_block bb
)
20366 int min_prev_count
;
20368 /* Only bother counting instructions along paths with no
20369 more than 2 basic blocks between entry and exit. Given
20370 that BB has an edge to exit, determine if a predecessor
20371 of BB has an edge from entry. If so, compute the number
20372 of instructions in the predecessor block. If there
20373 happen to be multiple such blocks, compute the minimum. */
20374 min_prev_count
= 4;
20375 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20378 edge_iterator prev_ei
;
20380 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
20382 min_prev_count
= 0;
20385 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
20387 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
20389 int count
= ix86_count_insn_bb (e
->src
);
20390 if (count
< min_prev_count
)
20391 min_prev_count
= count
;
20397 if (min_prev_count
< 4)
20398 min_prev_count
+= ix86_count_insn_bb (bb
);
20400 return min_prev_count
;
20403 /* Pad short function to 4 instructions. */
20406 ix86_pad_short_function (void)
20411 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20413 rtx_insn
*ret
= BB_END (e
->src
);
20414 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
20416 int insn_count
= ix86_count_insn (e
->src
);
20418 /* Pad short function. */
20419 if (insn_count
< 4)
20421 rtx_insn
*insn
= ret
;
20423 /* Find epilogue. */
20426 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
20427 insn
= PREV_INSN (insn
);
20432 /* Two NOPs count as one instruction. */
20433 insn_count
= 2 * (4 - insn_count
);
20434 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
20440 /* Fix up a Windows system unwinder issue. If an EH region falls through into
20441 the epilogue, the Windows system unwinder will apply epilogue logic and
20442 produce incorrect offsets. This can be avoided by adding a nop between
20443 the last insn that can throw and the first insn of the epilogue. */
20446 ix86_seh_fixup_eh_fallthru (void)
20451 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20453 rtx_insn
*insn
, *next
;
20455 /* Find the beginning of the epilogue. */
20456 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
20457 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
20462 /* We only care about preceding insns that can throw. */
20463 insn
= prev_active_insn (insn
);
20464 if (insn
== NULL
|| !can_throw_internal (insn
))
20467 /* Do not separate calls from their debug information. */
20468 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
20469 if (NOTE_P (next
) && NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
)
20474 emit_insn_after (gen_nops (const1_rtx
), insn
);
20478 /* Implement machine specific optimizations. We implement padding of returns
20479 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20483 /* We are freeing block_for_insn in the toplev to keep compatibility
20484 with old MDEP_REORGS that are not CFG based. Recompute it now. */
20485 compute_bb_for_insn ();
20487 if (TARGET_SEH
&& current_function_has_exception_handlers ())
20488 ix86_seh_fixup_eh_fallthru ();
20490 if (optimize
&& optimize_function_for_speed_p (cfun
))
20492 if (TARGET_PAD_SHORT_FUNCTION
)
20493 ix86_pad_short_function ();
20494 else if (TARGET_PAD_RETURNS
)
20495 ix86_pad_returns ();
20496 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20497 if (TARGET_FOUR_JUMP_LIMIT
)
20498 ix86_avoid_jump_mispredicts ();
20503 /* Return nonzero when QImode register that must be represented via REX prefix
20506 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
20509 extract_insn_cached (insn
);
20510 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20511 if (GENERAL_REG_P (recog_data
.operand
[i
])
20512 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
20517 /* Return true when INSN mentions register that must be encoded using REX
20520 x86_extended_reg_mentioned_p (rtx insn
)
20522 subrtx_iterator::array_type array
;
20523 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
20525 const_rtx x
= *iter
;
20527 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
20533 /* If profitable, negate (without causing overflow) integer constant
20534 of mode MODE at location LOC. Return true in this case. */
20536 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
20540 if (!CONST_INT_P (*loc
))
20546 /* DImode x86_64 constants must fit in 32 bits. */
20547 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
20558 gcc_unreachable ();
20561 /* Avoid overflows. */
20562 if (mode_signbit_p (mode
, *loc
))
20565 val
= INTVAL (*loc
);
20567 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20568 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
20569 if ((val
< 0 && val
!= -128)
20572 *loc
= GEN_INT (-val
);
20579 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20580 optabs would emit if we didn't have TFmode patterns. */
20583 x86_emit_floatuns (rtx operands
[2])
20585 rtx_code_label
*neglab
, *donelab
;
20586 rtx i0
, i1
, f0
, in
, out
;
20587 machine_mode mode
, inmode
;
20589 inmode
= GET_MODE (operands
[1]);
20590 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20593 in
= force_reg (inmode
, operands
[1]);
20594 mode
= GET_MODE (out
);
20595 neglab
= gen_label_rtx ();
20596 donelab
= gen_label_rtx ();
20597 f0
= gen_reg_rtx (mode
);
20599 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20601 expand_float (out
, in
, 0);
20603 emit_jump_insn (gen_jump (donelab
));
20606 emit_label (neglab
);
20608 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20610 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20612 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20614 expand_float (f0
, i0
, 0);
20616 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20618 emit_label (donelab
);
20621 /* Target hook for scalar_mode_supported_p. */
20623 ix86_scalar_mode_supported_p (scalar_mode mode
)
20625 if (DECIMAL_FLOAT_MODE_P (mode
))
20626 return default_decimal_float_supported_p ();
20627 else if (mode
== TFmode
)
20630 return default_scalar_mode_supported_p (mode
);
20633 /* Implements target hook vector_mode_supported_p. */
20635 ix86_vector_mode_supported_p (machine_mode mode
)
20637 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20639 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20641 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
20643 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
20645 if ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
) && VALID_MMX_REG_MODE (mode
))
20647 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20652 /* Target hook for c_mode_for_suffix. */
20653 static machine_mode
20654 ix86_c_mode_for_suffix (char suffix
)
20664 /* Worker function for TARGET_MD_ASM_ADJUST.
20666 We implement asm flag outputs, and maintain source compatibility
20667 with the old cc0-based compiler. */
20670 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
20671 vec
<const char *> &constraints
,
20672 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
20674 bool saw_asm_flag
= false;
20677 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
20679 const char *con
= constraints
[i
];
20680 if (strncmp (con
, "=@cc", 4) != 0)
20683 if (strchr (con
, ',') != NULL
)
20685 error ("alternatives not allowed in %<asm%> flag output");
20689 bool invert
= false;
20691 invert
= true, con
++;
20693 machine_mode mode
= CCmode
;
20694 rtx_code code
= UNKNOWN
;
20700 mode
= CCAmode
, code
= EQ
;
20701 else if (con
[1] == 'e' && con
[2] == 0)
20702 mode
= CCCmode
, code
= NE
;
20706 mode
= CCCmode
, code
= EQ
;
20707 else if (con
[1] == 'e' && con
[2] == 0)
20708 mode
= CCAmode
, code
= NE
;
20712 mode
= CCCmode
, code
= EQ
;
20716 mode
= CCZmode
, code
= EQ
;
20720 mode
= CCGCmode
, code
= GT
;
20721 else if (con
[1] == 'e' && con
[2] == 0)
20722 mode
= CCGCmode
, code
= GE
;
20726 mode
= CCGCmode
, code
= LT
;
20727 else if (con
[1] == 'e' && con
[2] == 0)
20728 mode
= CCGCmode
, code
= LE
;
20732 mode
= CCOmode
, code
= EQ
;
20736 mode
= CCPmode
, code
= EQ
;
20740 mode
= CCSmode
, code
= EQ
;
20744 mode
= CCZmode
, code
= EQ
;
20747 if (code
== UNKNOWN
)
20749 error ("unknown %<asm%> flag output %qs", constraints
[i
]);
20753 code
= reverse_condition (code
);
20755 rtx dest
= outputs
[i
];
20758 /* This is the first asm flag output. Here we put the flags
20759 register in as the real output and adjust the condition to
20761 constraints
[i
] = "=Bf";
20762 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
20763 saw_asm_flag
= true;
20767 /* We don't need the flags register as output twice. */
20768 constraints
[i
] = "=X";
20769 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
20772 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
20773 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
20775 machine_mode dest_mode
= GET_MODE (dest
);
20776 if (!SCALAR_INT_MODE_P (dest_mode
))
20778 error ("invalid type for %<asm%> flag output");
20782 if (dest_mode
== DImode
&& !TARGET_64BIT
)
20783 dest_mode
= SImode
;
20785 if (dest_mode
!= QImode
)
20787 rtx destqi
= gen_reg_rtx (QImode
);
20788 emit_insn (gen_rtx_SET (destqi
, x
));
20790 if (TARGET_ZERO_EXTEND_WITH_AND
20791 && optimize_function_for_speed_p (cfun
))
20793 x
= force_reg (dest_mode
, const0_rtx
);
20795 emit_insn (gen_movstrictqi
20796 (gen_lowpart (QImode
, x
), destqi
));
20799 x
= gen_rtx_ZERO_EXTEND (dest_mode
, destqi
);
20802 if (dest_mode
!= GET_MODE (dest
))
20804 rtx tmp
= gen_reg_rtx (SImode
);
20806 emit_insn (gen_rtx_SET (tmp
, x
));
20807 emit_insn (gen_zero_extendsidi2 (dest
, tmp
));
20810 emit_insn (gen_rtx_SET (dest
, x
));
20812 rtx_insn
*seq
= get_insns ();
20819 /* If we had no asm flag outputs, clobber the flags. */
20820 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
20821 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
20826 /* Implements target vector targetm.asm.encode_section_info. */
20828 static void ATTRIBUTE_UNUSED
20829 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20831 default_encode_section_info (decl
, rtl
, first
);
20833 if (ix86_in_large_data_p (decl
))
20834 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20837 /* Worker function for REVERSE_CONDITION. */
20840 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
20842 return (mode
== CCFPmode
20843 ? reverse_condition_maybe_unordered (code
)
20844 : reverse_condition (code
));
20847 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20851 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
20853 if (REG_P (operands
[0]))
20855 if (REG_P (operands
[1])
20856 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20858 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20859 return output_387_ffreep (operands
, 0);
20860 return "fstp\t%y0";
20862 if (STACK_TOP_P (operands
[0]))
20863 return "fld%Z1\t%y1";
20866 else if (MEM_P (operands
[0]))
20868 gcc_assert (REG_P (operands
[1]));
20869 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20870 return "fstp%Z0\t%y0";
20873 /* There is no non-popping store to memory for XFmode.
20874 So if we need one, follow the store with a load. */
20875 if (GET_MODE (operands
[0]) == XFmode
)
20876 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
20878 return "fst%Z0\t%y0";
20884 #ifdef TARGET_SOLARIS
20885 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20888 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20891 /* With Binutils 2.15, the "@unwind" marker must be specified on
20892 every occurrence of the ".eh_frame" section, not just the first
20895 && strcmp (name
, ".eh_frame") == 0)
20897 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20898 flags
& SECTION_WRITE
? "aw" : "a");
20903 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
20905 solaris_elf_asm_comdat_section (name
, flags
, decl
);
20909 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
20910 SPARC assembler. One cannot mix single-letter flags and #exclude, so
20911 only emit the latter here. */
20912 if (flags
& SECTION_EXCLUDE
)
20914 fprintf (asm_out_file
, "\t.section\t%s,#exclude\n", name
);
20919 default_elf_asm_named_section (name
, flags
, decl
);
20921 #endif /* TARGET_SOLARIS */
20923 /* Return the mangling of TYPE if it is an extended fundamental type. */
20925 static const char *
20926 ix86_mangle_type (const_tree type
)
20928 type
= TYPE_MAIN_VARIANT (type
);
20930 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20931 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
20934 switch (TYPE_MODE (type
))
20937 /* __float128 is "g". */
20940 /* "long double" or __float80 is "e". */
20947 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
20950 ix86_stack_protect_guard (void)
20952 if (TARGET_SSP_TLS_GUARD
)
20954 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
20955 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
20956 tree type
= build_qualified_type (type_node
, qual
);
20959 if (global_options_set
.x_ix86_stack_protector_guard_symbol_str
)
20961 t
= ix86_tls_stack_chk_guard_decl
;
20968 (UNKNOWN_LOCATION
, VAR_DECL
,
20969 get_identifier (ix86_stack_protector_guard_symbol_str
),
20971 TREE_STATIC (t
) = 1;
20972 TREE_PUBLIC (t
) = 1;
20973 DECL_EXTERNAL (t
) = 1;
20975 TREE_THIS_VOLATILE (t
) = 1;
20976 DECL_ARTIFICIAL (t
) = 1;
20977 DECL_IGNORED_P (t
) = 1;
20979 /* Do not share RTL as the declaration is visible outside of
20980 current function. */
20982 RTX_FLAG (x
, used
) = 1;
20984 ix86_tls_stack_chk_guard_decl
= t
;
20989 tree asptrtype
= build_pointer_type (type
);
20991 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
20992 t
= build2 (MEM_REF
, asptrtype
, t
,
20993 build_int_cst (asptrtype
, 0));
20994 TREE_THIS_VOLATILE (t
) = 1;
21000 return default_stack_protect_guard ();
21003 /* For 32-bit code we can save PIC register setup by using
21004 __stack_chk_fail_local hidden function instead of calling
21005 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21006 register, so it is better to call __stack_chk_fail directly. */
21008 static tree ATTRIBUTE_UNUSED
21009 ix86_stack_protect_fail (void)
21011 return TARGET_64BIT
21012 ? default_external_stack_protect_fail ()
21013 : default_hidden_stack_protect_fail ();
21016 /* Select a format to encode pointers in exception handling data. CODE
21017 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21018 true if the symbol may be affected by dynamic relocations.
21020 ??? All x86 object file formats are capable of representing this.
21021 After all, the relocation needed is the same as for the call insn.
21022 Whether or not a particular assembler allows us to enter such, I
21023 guess we'll have to see. */
21025 asm_preferred_eh_data_format (int code
, int global
)
21029 int type
= DW_EH_PE_sdata8
;
21031 || ix86_cmodel
== CM_SMALL_PIC
21032 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21033 type
= DW_EH_PE_sdata4
;
21034 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21036 if (ix86_cmodel
== CM_SMALL
21037 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21038 return DW_EH_PE_udata4
;
21039 return DW_EH_PE_absptr
;
21042 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21044 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
21048 machine_mode mode
= TImode
;
21050 if (vectype
!= NULL
)
21052 fp
= FLOAT_TYPE_P (vectype
);
21053 mode
= TYPE_MODE (vectype
);
21056 switch (type_of_cost
)
21059 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
21062 /* load/store costs are relative to register move which is 2. Recompute
21063 it to COSTS_N_INSNS so everything have same base. */
21064 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
21065 : ix86_cost
->int_load
[2]) / 2;
21068 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
21069 : ix86_cost
->int_store
[2]) / 2;
21072 return ix86_vec_cost (mode
,
21073 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
);
21076 index
= sse_store_index (mode
);
21077 /* See PR82713 - we may end up being called on non-vector type. */
21080 return COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2;
21083 index
= sse_store_index (mode
);
21084 /* See PR82713 - we may end up being called on non-vector type. */
21087 return COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2;
21089 case vec_to_scalar
:
21090 case scalar_to_vec
:
21091 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21093 /* We should have separate costs for unaligned loads and gather/scatter.
21094 Do that incrementally. */
21095 case unaligned_load
:
21096 index
= sse_store_index (mode
);
21097 /* See PR82713 - we may end up being called on non-vector type. */
21100 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_load
[index
]) / 2;
21102 case unaligned_store
:
21103 index
= sse_store_index (mode
);
21104 /* See PR82713 - we may end up being called on non-vector type. */
21107 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_store
[index
]) / 2;
21109 case vector_gather_load
:
21110 return ix86_vec_cost (mode
,
21112 (ix86_cost
->gather_static
21113 + ix86_cost
->gather_per_elt
21114 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21116 case vector_scatter_store
:
21117 return ix86_vec_cost (mode
,
21119 (ix86_cost
->scatter_static
21120 + ix86_cost
->scatter_per_elt
21121 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21123 case cond_branch_taken
:
21124 return ix86_cost
->cond_taken_branch_cost
;
21126 case cond_branch_not_taken
:
21127 return ix86_cost
->cond_not_taken_branch_cost
;
21130 case vec_promote_demote
:
21131 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21133 case vec_construct
:
21135 /* N element inserts into SSE vectors. */
21136 int cost
= TYPE_VECTOR_SUBPARTS (vectype
) * ix86_cost
->sse_op
;
21137 /* One vinserti128 for combining two SSE vectors for AVX256. */
21138 if (GET_MODE_BITSIZE (mode
) == 256)
21139 cost
+= ix86_vec_cost (mode
, ix86_cost
->addss
);
21140 /* One vinserti64x4 and two vinserti128 for combining SSE
21141 and AVX256 vectors to AVX512. */
21142 else if (GET_MODE_BITSIZE (mode
) == 512)
21143 cost
+= 3 * ix86_vec_cost (mode
, ix86_cost
->addss
);
21148 gcc_unreachable ();
21153 /* This function returns the calling abi specific va_list type node.
21154 It returns the FNDECL specific va_list type. */
21157 ix86_fn_abi_va_list (tree fndecl
)
21160 return va_list_type_node
;
21161 gcc_assert (fndecl
!= NULL_TREE
);
21163 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
21164 return ms_va_list_type_node
;
21166 return sysv_va_list_type_node
;
21169 /* Returns the canonical va_list type specified by TYPE. If there
21170 is no valid TYPE provided, it return NULL_TREE. */
21173 ix86_canonical_va_list_type (tree type
)
21177 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
21178 return ms_va_list_type_node
;
21180 if ((TREE_CODE (type
) == ARRAY_TYPE
21181 && integer_zerop (array_type_nelts (type
)))
21182 || POINTER_TYPE_P (type
))
21184 tree elem_type
= TREE_TYPE (type
);
21185 if (TREE_CODE (elem_type
) == RECORD_TYPE
21186 && lookup_attribute ("sysv_abi va_list",
21187 TYPE_ATTRIBUTES (elem_type
)))
21188 return sysv_va_list_type_node
;
21194 return std_canonical_va_list_type (type
);
21197 /* Iterate through the target-specific builtin types for va_list.
21198 IDX denotes the iterator, *PTREE is set to the result type of
21199 the va_list builtin, and *PNAME to its internal type.
21200 Returns zero if there is no element for this index, otherwise
21201 IDX should be increased upon the next call.
21202 Note, do not iterate a base builtin's name like __builtin_va_list.
21203 Used from c_common_nodes_and_builtins. */
21206 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
21216 *ptree
= ms_va_list_type_node
;
21217 *pname
= "__builtin_ms_va_list";
21221 *ptree
= sysv_va_list_type_node
;
21222 *pname
= "__builtin_sysv_va_list";
21230 #undef TARGET_SCHED_DISPATCH
21231 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21232 #undef TARGET_SCHED_DISPATCH_DO
21233 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21234 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21235 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21236 #undef TARGET_SCHED_REORDER
21237 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21238 #undef TARGET_SCHED_ADJUST_PRIORITY
21239 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21240 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21241 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21242 ix86_dependencies_evaluation_hook
21245 /* Implementation of reassociation_width target hook used by
21246 reassoc phase to identify parallelism level in reassociated
21247 tree. Statements tree_code is passed in OPC. Arguments type
21248 is passed in MODE. */
21251 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
21255 if (VECTOR_MODE_P (mode
))
21258 if (INTEGRAL_MODE_P (mode
))
21259 width
= ix86_cost
->reassoc_vec_int
;
21260 else if (FLOAT_MODE_P (mode
))
21261 width
= ix86_cost
->reassoc_vec_fp
;
21266 /* Integer vector instructions execute in FP unit
21267 and can execute 3 additions and one multiplication per cycle. */
21268 if ((ix86_tune
== PROCESSOR_ZNVER1
|| ix86_tune
== PROCESSOR_ZNVER2
)
21269 && INTEGRAL_MODE_P (mode
) && op
!= PLUS
&& op
!= MINUS
)
21272 /* Account for targets that splits wide vectors into multiple parts. */
21273 if (TARGET_AVX128_OPTIMAL
&& GET_MODE_BITSIZE (mode
) > 128)
21274 div
= GET_MODE_BITSIZE (mode
) / 128;
21275 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
21276 div
= GET_MODE_BITSIZE (mode
) / 64;
21277 width
= (width
+ div
- 1) / div
;
21280 else if (INTEGRAL_MODE_P (mode
))
21281 width
= ix86_cost
->reassoc_int
;
21282 else if (FLOAT_MODE_P (mode
))
21283 width
= ix86_cost
->reassoc_fp
;
21285 /* Avoid using too many registers in 32bit mode. */
21286 if (!TARGET_64BIT
&& width
> 2)
21291 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21292 place emms and femms instructions. */
21294 static machine_mode
21295 ix86_preferred_simd_mode (scalar_mode mode
)
21303 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
21305 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21311 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
21313 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21319 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21321 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21327 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21329 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21335 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21337 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21343 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21345 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21347 else if (TARGET_SSE2
)
21356 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21357 vectors. If AVX512F is enabled then try vectorizing with 512bit,
21358 256bit and 128bit vectors. */
21361 ix86_autovectorize_vector_sizes (vector_sizes
*sizes
, bool all
)
21363 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21365 sizes
->safe_push (64);
21366 sizes
->safe_push (32);
21367 sizes
->safe_push (16);
21369 else if (TARGET_AVX512F
&& all
)
21371 sizes
->safe_push (32);
21372 sizes
->safe_push (16);
21373 sizes
->safe_push (64);
21375 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21377 sizes
->safe_push (32);
21378 sizes
->safe_push (16);
21380 else if (TARGET_AVX
&& all
)
21382 sizes
->safe_push (16);
21383 sizes
->safe_push (32);
21387 /* Implemenation of targetm.vectorize.get_mask_mode. */
21389 static opt_machine_mode
21390 ix86_get_mask_mode (poly_uint64 nunits
, poly_uint64 vector_size
)
21392 unsigned elem_size
= vector_size
/ nunits
;
21394 /* Scalar mask case. */
21395 if ((TARGET_AVX512F
&& vector_size
== 64)
21396 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
21398 if (elem_size
== 4 || elem_size
== 8 || TARGET_AVX512BW
)
21399 return smallest_int_mode_for_size (nunits
);
21402 scalar_int_mode elem_mode
21403 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
21405 gcc_assert (elem_size
* nunits
== vector_size
);
21407 return mode_for_vector (elem_mode
, nunits
);
21412 /* Return class of registers which could be used for pseudo of MODE
21413 and of class RCLASS for spilling instead of memory. Return NO_REGS
21414 if it is not possible or non-profitable. */
21416 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
21419 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
21421 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21423 && TARGET_INTER_UNIT_MOVES_TO_VEC
21424 && TARGET_INTER_UNIT_MOVES_FROM_VEC
21425 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
21426 && INTEGER_CLASS_P (rclass
))
21427 return ALL_SSE_REGS
;
21431 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
21432 but returns a lower bound. */
21434 static unsigned int
21435 ix86_max_noce_ifcvt_seq_cost (edge e
)
21437 bool predictable_p
= predictable_edge_p (e
);
21439 enum compiler_param param
21441 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
21442 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST
);
21444 /* If we have a parameter set, use that, otherwise take a guess using
21446 if (global_options_set
.x_param_values
[param
])
21447 return PARAM_VALUE (param
);
21449 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
21452 /* Return true if SEQ is a good candidate as a replacement for the
21453 if-convertible sequence described in IF_INFO. */
21456 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
21458 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
21461 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21462 Maybe we should allow even more conditional moves as long as they
21463 are used far enough not to stall the CPU, or also consider
21464 IF_INFO->TEST_BB succ edge probabilities. */
21465 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
21467 rtx set
= single_set (insn
);
21470 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
21472 rtx src
= SET_SRC (set
);
21473 machine_mode mode
= GET_MODE (src
);
21474 if (GET_MODE_CLASS (mode
) != MODE_INT
21475 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
21477 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
21478 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
21480 /* insn is CMOV or FCMOV. */
21481 if (++cmov_cnt
> 1)
21485 return default_noce_conversion_profitable_p (seq
, if_info
);
21488 /* Implement targetm.vectorize.init_cost. */
21491 ix86_init_cost (struct loop
*)
21493 unsigned *cost
= XNEWVEC (unsigned, 3);
21494 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
21498 /* Implement targetm.vectorize.add_stmt_cost. */
21501 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
21502 struct _stmt_vec_info
*stmt_info
, int misalign
,
21503 enum vect_cost_model_location where
)
21505 unsigned *cost
= (unsigned *) data
;
21506 unsigned retval
= 0;
21508 = (kind
== scalar_stmt
|| kind
== scalar_load
|| kind
== scalar_store
);
21510 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
21511 int stmt_cost
= - 1;
21514 machine_mode mode
= scalar_p
? SImode
: TImode
;
21516 if (vectype
!= NULL
)
21518 fp
= FLOAT_TYPE_P (vectype
);
21519 mode
= TYPE_MODE (vectype
);
21521 mode
= TYPE_MODE (TREE_TYPE (vectype
));
21524 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
21526 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
21528 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
21529 /*machine_mode inner_mode = mode;
21530 if (VECTOR_MODE_P (mode))
21531 inner_mode = GET_MODE_INNER (mode);*/
21536 case POINTER_PLUS_EXPR
:
21538 if (kind
== scalar_stmt
)
21540 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21541 stmt_cost
= ix86_cost
->addss
;
21542 else if (X87_FLOAT_MODE_P (mode
))
21543 stmt_cost
= ix86_cost
->fadd
;
21545 stmt_cost
= ix86_cost
->add
;
21548 stmt_cost
= ix86_vec_cost (mode
, fp
? ix86_cost
->addss
21549 : ix86_cost
->sse_op
);
21553 case WIDEN_MULT_EXPR
:
21554 case MULT_HIGHPART_EXPR
:
21555 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
21558 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21559 stmt_cost
= ix86_cost
->sse_op
;
21560 else if (X87_FLOAT_MODE_P (mode
))
21561 stmt_cost
= ix86_cost
->fchs
;
21562 else if (VECTOR_MODE_P (mode
))
21563 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21565 stmt_cost
= ix86_cost
->add
;
21567 case TRUNC_DIV_EXPR
:
21568 case CEIL_DIV_EXPR
:
21569 case FLOOR_DIV_EXPR
:
21570 case ROUND_DIV_EXPR
:
21571 case TRUNC_MOD_EXPR
:
21572 case CEIL_MOD_EXPR
:
21573 case FLOOR_MOD_EXPR
:
21575 case ROUND_MOD_EXPR
:
21576 case EXACT_DIV_EXPR
:
21577 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
21585 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
21586 stmt_cost
= ix86_shift_rotate_cost
21588 TREE_CODE (op2
) == INTEGER_CST
,
21589 cst_and_fits_in_hwi (op2
) ? int_cst_value (op2
) : -1,
21590 true, false, false, NULL
, NULL
);
21594 /* Only sign-conversions are free. */
21595 if (tree_nop_conversion_p
21596 (TREE_TYPE (gimple_assign_lhs (stmt_info
->stmt
)),
21597 TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
))))
21609 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21610 stmt_cost
= ix86_cost
->sse_op
;
21611 else if (VECTOR_MODE_P (mode
))
21612 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21614 stmt_cost
= ix86_cost
->add
;
21622 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
21625 && (cfn
= gimple_call_combined_fn (stmt_info
->stmt
)) != CFN_LAST
)
21629 stmt_cost
= ix86_vec_cost (mode
,
21630 mode
== SFmode
? ix86_cost
->fmass
21631 : ix86_cost
->fmasd
);
21637 /* If we do elementwise loads into a vector then we are bound by
21638 latency and execution resources for the many scalar loads
21639 (AGU and load ports). Try to account for this by scaling the
21640 construction cost by the number of elements involved. */
21641 if ((kind
== vec_construct
|| kind
== vec_to_scalar
)
21643 && (STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
21644 || STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
21645 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
21646 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info
))) != INTEGER_CST
)
21648 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
21649 stmt_cost
*= (TYPE_VECTOR_SUBPARTS (vectype
) + 1);
21651 if (stmt_cost
== -1)
21652 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
21654 /* Penalize DFmode vector operations for Bonnell. */
21655 if (TARGET_BONNELL
&& kind
== vector_stmt
21656 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
21657 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
21659 /* Statements in an inner loop relative to the loop being
21660 vectorized are weighted more heavily. The value here is
21661 arbitrary and could potentially be improved with analysis. */
21662 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
21663 count
*= 50; /* FIXME. */
21665 retval
= (unsigned) (count
* stmt_cost
);
21667 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
21668 for Silvermont as it has out of order integer pipeline and can execute
21669 2 scalar instruction per tick, but has in order SIMD pipeline. */
21670 if ((TARGET_SILVERMONT
|| TARGET_GOLDMONT
|| TARGET_GOLDMONT_PLUS
21671 || TARGET_TREMONT
|| TARGET_INTEL
) && stmt_info
&& stmt_info
->stmt
)
21673 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
21674 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
21675 retval
= (retval
* 17) / 10;
21678 cost
[where
] += retval
;
21683 /* Implement targetm.vectorize.finish_cost. */
21686 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
21687 unsigned *body_cost
, unsigned *epilogue_cost
)
21689 unsigned *cost
= (unsigned *) data
;
21690 *prologue_cost
= cost
[vect_prologue
];
21691 *body_cost
= cost
[vect_body
];
21692 *epilogue_cost
= cost
[vect_epilogue
];
21695 /* Implement targetm.vectorize.destroy_cost_data. */
21698 ix86_destroy_cost_data (void *data
)
21703 /* Validate target specific memory model bits in VAL. */
21705 static unsigned HOST_WIDE_INT
21706 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
21708 enum memmodel model
= memmodel_from_int (val
);
21711 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
21713 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
21715 warning (OPT_Winvalid_memory_model
,
21716 "unknown architecture specific memory model");
21717 return MEMMODEL_SEQ_CST
;
21719 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
21720 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
21722 warning (OPT_Winvalid_memory_model
,
21723 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
21725 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
21727 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
21729 warning (OPT_Winvalid_memory_model
,
21730 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
21732 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
21737 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
21738 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
21739 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
21740 or number of vecsize_mangle variants that should be emitted. */
21743 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
21744 struct cgraph_simd_clone
*clonei
,
21745 tree base_type
, int num
)
21749 if (clonei
->simdlen
21750 && (clonei
->simdlen
< 2
21751 || clonei
->simdlen
> 1024
21752 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
21754 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21755 "unsupported simdlen %d", clonei
->simdlen
);
21759 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
21760 if (TREE_CODE (ret_type
) != VOID_TYPE
)
21761 switch (TYPE_MODE (ret_type
))
21769 /* case E_SCmode: */
21770 /* case E_DCmode: */
21771 if (!AGGREGATE_TYPE_P (ret_type
))
21775 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21776 "unsupported return type %qT for simd", ret_type
);
21782 tree type_arg_types
= TYPE_ARG_TYPES (TREE_TYPE (node
->decl
));
21783 bool decl_arg_p
= (node
->definition
|| type_arg_types
== NULL_TREE
);
21785 for (t
= (decl_arg_p
? DECL_ARGUMENTS (node
->decl
) : type_arg_types
), i
= 0;
21786 t
&& t
!= void_list_node
; t
= TREE_CHAIN (t
), i
++)
21788 tree arg_type
= decl_arg_p
? TREE_TYPE (t
) : TREE_VALUE (t
);
21789 switch (TYPE_MODE (arg_type
))
21797 /* case E_SCmode: */
21798 /* case E_DCmode: */
21799 if (!AGGREGATE_TYPE_P (arg_type
))
21803 if (clonei
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
)
21805 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21806 "unsupported argument type %qT for simd", arg_type
);
21811 if (!TREE_PUBLIC (node
->decl
))
21813 /* If the function isn't exported, we can pick up just one ISA
21815 if (TARGET_AVX512F
)
21816 clonei
->vecsize_mangle
= 'e';
21817 else if (TARGET_AVX2
)
21818 clonei
->vecsize_mangle
= 'd';
21819 else if (TARGET_AVX
)
21820 clonei
->vecsize_mangle
= 'c';
21822 clonei
->vecsize_mangle
= 'b';
21827 clonei
->vecsize_mangle
= "bcde"[num
];
21830 clonei
->mask_mode
= VOIDmode
;
21831 switch (clonei
->vecsize_mangle
)
21834 clonei
->vecsize_int
= 128;
21835 clonei
->vecsize_float
= 128;
21838 clonei
->vecsize_int
= 128;
21839 clonei
->vecsize_float
= 256;
21842 clonei
->vecsize_int
= 256;
21843 clonei
->vecsize_float
= 256;
21846 clonei
->vecsize_int
= 512;
21847 clonei
->vecsize_float
= 512;
21848 if (TYPE_MODE (base_type
) == QImode
)
21849 clonei
->mask_mode
= DImode
;
21851 clonei
->mask_mode
= SImode
;
21854 if (clonei
->simdlen
== 0)
21856 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
21857 clonei
->simdlen
= clonei
->vecsize_int
;
21859 clonei
->simdlen
= clonei
->vecsize_float
;
21860 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
21862 else if (clonei
->simdlen
> 16)
21864 /* For compatibility with ICC, use the same upper bounds
21865 for simdlen. In particular, for CTYPE below, use the return type,
21866 unless the function returns void, in that case use the characteristic
21867 type. If it is possible for given SIMDLEN to pass CTYPE value
21868 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
21869 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
21870 emit corresponding clone. */
21871 tree ctype
= ret_type
;
21872 if (TREE_CODE (ret_type
) == VOID_TYPE
)
21874 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
21875 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
21876 cnt
/= clonei
->vecsize_int
;
21878 cnt
/= clonei
->vecsize_float
;
21879 if (cnt
> (TARGET_64BIT
? 16 : 8))
21881 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21882 "unsupported simdlen %d", clonei
->simdlen
);
21889 /* If SIMD clone NODE can't be used in a vectorized loop
21890 in current function, return -1, otherwise return a badness of using it
21891 (0 if it is most desirable from vecsize_mangle point of view, 1
21892 slightly less desirable, etc.). */
21895 ix86_simd_clone_usable (struct cgraph_node
*node
)
21897 switch (node
->simdclone
->vecsize_mangle
)
21904 return TARGET_AVX2
? 2 : 1;
21908 return TARGET_AVX2
? 1 : 0;
21914 if (!TARGET_AVX512F
)
21918 gcc_unreachable ();
21922 /* This function adjusts the unroll factor based on
21923 the hardware capabilities. For ex, bdver3 has
21924 a loop buffer which makes unrolling of smaller
21925 loops less important. This function decides the
21926 unroll factor using number of memory references
21927 (value 32 is used) as a heuristic. */
21930 ix86_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
21935 unsigned mem_count
= 0;
21937 if (!TARGET_ADJUST_UNROLL
)
21940 /* Count the number of memory references within the loop body.
21941 This value determines the unrolling factor for bdver3 and bdver4
21943 subrtx_iterator::array_type array
;
21944 bbs
= get_loop_body (loop
);
21945 for (i
= 0; i
< loop
->num_nodes
; i
++)
21946 FOR_BB_INSNS (bbs
[i
], insn
)
21947 if (NONDEBUG_INSN_P (insn
))
21948 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
21949 if (const_rtx x
= *iter
)
21952 machine_mode mode
= GET_MODE (x
);
21953 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
21961 if (mem_count
&& mem_count
<=32)
21962 return MIN (nunroll
, 32 / mem_count
);
21968 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
21971 ix86_float_exceptions_rounding_supported_p (void)
21973 /* For x87 floating point with standard excess precision handling,
21974 there is no adddf3 pattern (since x87 floating point only has
21975 XFmode operations) so the default hook implementation gets this
21977 return TARGET_80387
|| (TARGET_SSE
&& TARGET_SSE_MATH
);
21980 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
21983 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
21985 if (!TARGET_80387
&& !(TARGET_SSE
&& TARGET_SSE_MATH
))
21987 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
21990 tree fenv_index_type
= build_index_type (size_int (6));
21991 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
21992 tree fenv_var
= create_tmp_var_raw (fenv_type
);
21993 TREE_ADDRESSABLE (fenv_var
) = 1;
21994 tree fenv_ptr
= build_pointer_type (fenv_type
);
21995 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
21996 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
21997 tree fnstenv
= get_ix86_builtin (IX86_BUILTIN_FNSTENV
);
21998 tree fldenv
= get_ix86_builtin (IX86_BUILTIN_FLDENV
);
21999 tree fnstsw
= get_ix86_builtin (IX86_BUILTIN_FNSTSW
);
22000 tree fnclex
= get_ix86_builtin (IX86_BUILTIN_FNCLEX
);
22001 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
22002 tree hold_fnclex
= build_call_expr (fnclex
, 0);
22003 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
22004 NULL_TREE
, NULL_TREE
);
22005 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
22007 *clear
= build_call_expr (fnclex
, 0);
22008 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
22009 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
22010 tree sw_mod
= build2 (MODIFY_EXPR
, short_unsigned_type_node
,
22011 sw_var
, fnstsw_call
);
22012 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
22013 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
22014 exceptions_var
, exceptions_x87
);
22015 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
22016 sw_mod
, update_mod
);
22017 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
22018 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
22020 if (TARGET_SSE
&& TARGET_SSE_MATH
)
22022 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
22023 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
22024 tree stmxcsr
= get_ix86_builtin (IX86_BUILTIN_STMXCSR
);
22025 tree ldmxcsr
= get_ix86_builtin (IX86_BUILTIN_LDMXCSR
);
22026 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
22027 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
22028 mxcsr_orig_var
, stmxcsr_hold_call
);
22029 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
22031 build_int_cst (unsigned_type_node
, 0x1f80));
22032 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
22033 build_int_cst (unsigned_type_node
, 0xffffffc0));
22034 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
22035 mxcsr_mod_var
, hold_mod_val
);
22036 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22037 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
22038 hold_assign_orig
, hold_assign_mod
);
22039 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
22040 ldmxcsr_hold_call
);
22042 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
22045 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22047 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
22048 ldmxcsr_clear_call
);
22050 *clear
= ldmxcsr_clear_call
;
22051 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
22052 tree exceptions_sse
= fold_convert (integer_type_node
,
22053 stxmcsr_update_call
);
22056 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
22057 exceptions_var
, exceptions_sse
);
22058 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
22059 exceptions_var
, exceptions_mod
);
22060 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
22061 exceptions_assign
);
22064 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
22065 exceptions_var
, exceptions_sse
);
22066 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
22067 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22068 ldmxcsr_update_call
);
22070 tree atomic_feraiseexcept
22071 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
22072 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
22073 1, exceptions_var
);
22074 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22075 atomic_feraiseexcept_call
);
22078 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22079 /* For i386, common symbol is local only for non-PIE binaries. For
22080 x86-64, common symbol is local only for non-PIE binaries or linker
22081 supports copy reloc in PIE binaries. */
22084 ix86_binds_local_p (const_tree exp
)
22086 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true, true,
22089 && HAVE_LD_PIE_COPYRELOC
!= 0)));
22093 /* If MEM is in the form of [base+offset], extract the two parts
22094 of address and set to BASE and OFFSET, otherwise return false. */
22097 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
22101 gcc_assert (MEM_P (mem
));
22103 addr
= XEXP (mem
, 0);
22105 if (GET_CODE (addr
) == CONST
)
22106 addr
= XEXP (addr
, 0);
22108 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
22111 *offset
= const0_rtx
;
22115 if (GET_CODE (addr
) == PLUS
22116 && (REG_P (XEXP (addr
, 0))
22117 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
22118 && CONST_INT_P (XEXP (addr
, 1)))
22120 *base
= XEXP (addr
, 0);
22121 *offset
= XEXP (addr
, 1);
22128 /* Given OPERANDS of consecutive load/store, check if we can merge
22129 them into move multiple. LOAD is true if they are load instructions.
22130 MODE is the mode of memory operands. */
22133 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
22136 HOST_WIDE_INT offval_1
, offval_2
, msize
;
22137 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
22141 mem_1
= operands
[1];
22142 mem_2
= operands
[3];
22143 reg_1
= operands
[0];
22144 reg_2
= operands
[2];
22148 mem_1
= operands
[0];
22149 mem_2
= operands
[2];
22150 reg_1
= operands
[1];
22151 reg_2
= operands
[3];
22154 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
22156 if (REGNO (reg_1
) != REGNO (reg_2
))
22159 /* Check if the addresses are in the form of [base+offset]. */
22160 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
22162 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
22165 /* Check if the bases are the same. */
22166 if (!rtx_equal_p (base_1
, base_2
))
22169 offval_1
= INTVAL (offset_1
);
22170 offval_2
= INTVAL (offset_2
);
22171 msize
= GET_MODE_SIZE (mode
);
22172 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22173 if (offval_1
+ msize
!= offval_2
)
22179 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22182 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
22183 optimization_type opt_type
)
22197 return opt_type
== OPTIMIZE_FOR_SPEED
;
22200 if (SSE_FLOAT_MODE_P (mode1
)
22202 && !flag_trapping_math
22204 return opt_type
== OPTIMIZE_FOR_SPEED
;
22210 if (SSE_FLOAT_MODE_P (mode1
)
22212 && !flag_trapping_math
22215 return opt_type
== OPTIMIZE_FOR_SPEED
;
22218 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p ();
22225 /* Address space support.
22227 This is not "far pointers" in the 16-bit sense, but an easy way
22228 to use %fs and %gs segment prefixes. Therefore:
22230 (a) All address spaces have the same modes,
22231 (b) All address spaces have the same addresss forms,
22232 (c) While %fs and %gs are technically subsets of the generic
22233 address space, they are probably not subsets of each other.
22234 (d) Since we have no access to the segment base register values
22235 without resorting to a system call, we cannot convert a
22236 non-default address space to a default address space.
22237 Therefore we do not claim %fs or %gs are subsets of generic.
22239 Therefore we can (mostly) use the default hooks. */
22241 /* All use of segmentation is assumed to make address 0 valid. */
22244 ix86_addr_space_zero_address_valid (addr_space_t as
)
22246 return as
!= ADDR_SPACE_GENERIC
;
22250 ix86_init_libfuncs (void)
22254 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
22255 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
22259 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
22260 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
22264 darwin_rename_builtins ();
22268 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22269 FPU, assume that the fpcw is set to extended precision; when using
22270 only SSE, rounding is correct; when using both SSE and the FPU,
22271 the rounding precision is indeterminate, since either may be chosen
22272 apparently at random. */
22274 static enum flt_eval_method
22275 ix86_excess_precision (enum excess_precision_type type
)
22279 case EXCESS_PRECISION_TYPE_FAST
:
22280 /* The fastest type to promote to will always be the native type,
22281 whether that occurs with implicit excess precision or
22283 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
22284 case EXCESS_PRECISION_TYPE_STANDARD
:
22285 case EXCESS_PRECISION_TYPE_IMPLICIT
:
22286 /* Otherwise, the excess precision we want when we are
22287 in a standards compliant mode, and the implicit precision we
22288 provide would be identical were it not for the unpredictable
22291 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
22292 else if (!TARGET_MIX_SSE_I387
)
22294 if (!(TARGET_SSE
&& TARGET_SSE_MATH
))
22295 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
22296 else if (TARGET_SSE2
)
22297 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
22300 /* If we are in standards compliant mode, but we know we will
22301 calculate in unpredictable precision, return
22302 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
22303 excess precision if the target can't guarantee it will honor
22305 return (type
== EXCESS_PRECISION_TYPE_STANDARD
22306 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22307 : FLT_EVAL_METHOD_UNPREDICTABLE
);
22309 gcc_unreachable ();
22312 return FLT_EVAL_METHOD_UNPREDICTABLE
;
22315 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
22316 decrements by exactly 2 no matter what the position was, there is no pushb.
22318 But as CIE data alignment factor on this arch is -4 for 32bit targets
22319 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22320 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
22323 ix86_push_rounding (poly_int64 bytes
)
22325 return ROUND_UP (bytes
, UNITS_PER_WORD
);
22328 /* Target-specific selftests. */
22332 namespace selftest
{
22334 /* Verify that hard regs are dumped as expected (in compact mode). */
22337 ix86_test_dumping_hard_regs ()
22339 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
22340 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
22343 /* Test dumping an insn with repeated references to the same SCRATCH,
22344 to verify the rtx_reuse code. */
22347 ix86_test_dumping_memory_blockage ()
22349 set_new_first_and_last_insn (NULL
, NULL
);
22351 rtx pat
= gen_memory_blockage ();
22352 rtx_reuse_manager r
;
22353 r
.preprocess (pat
);
22355 /* Verify that the repeated references to the SCRATCH show use
22356 reuse IDS. The first should be prefixed with a reuse ID,
22357 and the second should be dumped as a "reuse_rtx" of that ID.
22358 The expected string assumes Pmode == DImode. */
22359 if (Pmode
== DImode
)
22360 ASSERT_RTL_DUMP_EQ_WITH_REUSE
22361 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
22363 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
22364 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
22367 /* Verify loading an RTL dump; specifically a dump of copying
22368 a param on x86_64 from a hard reg into the frame.
22369 This test is target-specific since the dump contains target-specific
22373 ix86_test_loading_dump_fragment_1 ()
22375 rtl_dump_test
t (SELFTEST_LOCATION
,
22376 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22378 rtx_insn
*insn
= get_insn_by_uid (1);
22380 /* The block structure and indentation here is purely for
22381 readability; it mirrors the structure of the rtx. */
22384 rtx pat
= PATTERN (insn
);
22385 ASSERT_EQ (SET
, GET_CODE (pat
));
22387 rtx dest
= SET_DEST (pat
);
22388 ASSERT_EQ (MEM
, GET_CODE (dest
));
22389 /* Verify the "/c" was parsed. */
22390 ASSERT_TRUE (RTX_FLAG (dest
, call
));
22391 ASSERT_EQ (SImode
, GET_MODE (dest
));
22393 rtx addr
= XEXP (dest
, 0);
22394 ASSERT_EQ (PLUS
, GET_CODE (addr
));
22395 ASSERT_EQ (DImode
, GET_MODE (addr
));
22397 rtx lhs
= XEXP (addr
, 0);
22398 /* Verify that the "frame" REG was consolidated. */
22399 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
22402 rtx rhs
= XEXP (addr
, 1);
22403 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
22404 ASSERT_EQ (-4, INTVAL (rhs
));
22407 /* Verify the "[1 i+0 S4 A32]" was parsed. */
22408 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
22409 /* "i" should have been handled by synthesizing a global int
22410 variable named "i". */
22411 mem_expr
= MEM_EXPR (dest
);
22412 ASSERT_NE (mem_expr
, NULL
);
22413 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
22414 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
22415 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
22416 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
22418 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
22419 ASSERT_EQ (0, MEM_OFFSET (dest
));
22421 ASSERT_EQ (4, MEM_SIZE (dest
));
22423 ASSERT_EQ (32, MEM_ALIGN (dest
));
22426 rtx src
= SET_SRC (pat
);
22427 ASSERT_EQ (REG
, GET_CODE (src
));
22428 ASSERT_EQ (SImode
, GET_MODE (src
));
22429 ASSERT_EQ (5, REGNO (src
));
22430 tree reg_expr
= REG_EXPR (src
);
22431 /* "i" here should point to the same var as for the MEM_EXPR. */
22432 ASSERT_EQ (reg_expr
, mem_expr
);
22437 /* Verify that the RTL loader copes with a call_insn dump.
22438 This test is target-specific since the dump contains a target-specific
22442 ix86_test_loading_call_insn ()
22444 /* The test dump includes register "xmm0", where requires TARGET_SSE
22449 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
22451 rtx_insn
*insn
= get_insns ();
22452 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
22455 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
22457 rtx pat
= PATTERN (insn
);
22458 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
22460 /* Verify REG_NOTES. */
22462 /* "(expr_list:REG_CALL_DECL". */
22463 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
22464 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
22465 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
22467 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
22468 rtx_expr_list
*note1
= note0
->next ();
22469 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
22471 ASSERT_EQ (NULL
, note1
->next ());
22474 /* Verify CALL_INSN_FUNCTION_USAGE. */
22476 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
22477 rtx_expr_list
*usage
22478 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
22479 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
22480 ASSERT_EQ (DFmode
, GET_MODE (usage
));
22481 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
22482 ASSERT_EQ (NULL
, usage
->next ());
22486 /* Verify that the RTL loader copes a dump from print_rtx_function.
22487 This test is target-specific since the dump contains target-specific
22491 ix86_test_loading_full_dump ()
22493 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
22495 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
22497 rtx_insn
*insn_1
= get_insn_by_uid (1);
22498 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
22500 rtx_insn
*insn_7
= get_insn_by_uid (7);
22501 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
22502 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
22504 rtx_insn
*insn_15
= get_insn_by_uid (15);
22505 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
22506 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
22508 /* Verify crtl->return_rtx. */
22509 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
22510 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
22511 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
22514 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22515 In particular, verify that it correctly loads the 2nd operand.
22516 This test is target-specific since these are machine-specific
22517 operands (and enums). */
22520 ix86_test_loading_unspec ()
22522 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
22524 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
22526 ASSERT_TRUE (cfun
);
22528 /* Test of an UNSPEC. */
22529 rtx_insn
*insn
= get_insns ();
22530 ASSERT_EQ (INSN
, GET_CODE (insn
));
22531 rtx set
= single_set (insn
);
22532 ASSERT_NE (NULL
, set
);
22533 rtx dst
= SET_DEST (set
);
22534 ASSERT_EQ (MEM
, GET_CODE (dst
));
22535 rtx src
= SET_SRC (set
);
22536 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
22537 ASSERT_EQ (BLKmode
, GET_MODE (src
));
22538 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
22540 rtx v0
= XVECEXP (src
, 0, 0);
22542 /* Verify that the two uses of the first SCRATCH have pointer
22544 rtx scratch_a
= XEXP (dst
, 0);
22545 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
22547 rtx scratch_b
= XEXP (v0
, 0);
22548 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
22550 ASSERT_EQ (scratch_a
, scratch_b
);
22552 /* Verify that the two mems are thus treated as equal. */
22553 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
22555 /* Verify the the insn is recognized. */
22556 ASSERT_NE(-1, recog_memoized (insn
));
22558 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
22559 insn
= NEXT_INSN (insn
);
22560 ASSERT_EQ (INSN
, GET_CODE (insn
));
22562 set
= single_set (insn
);
22563 ASSERT_NE (NULL
, set
);
22565 src
= SET_SRC (set
);
22566 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
22567 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
22570 /* Run all target-specific selftests. */
22573 ix86_run_selftests (void)
22575 ix86_test_dumping_hard_regs ();
22576 ix86_test_dumping_memory_blockage ();
22578 /* Various tests of loading RTL dumps, here because they contain
22579 ix86-isms (e.g. names of hard regs). */
22580 ix86_test_loading_dump_fragment_1 ();
22581 ix86_test_loading_call_insn ();
22582 ix86_test_loading_full_dump ();
22583 ix86_test_loading_unspec ();
22586 } // namespace selftest
22588 #endif /* CHECKING_P */
22590 /* Initialize the GCC target structure. */
22591 #undef TARGET_RETURN_IN_MEMORY
22592 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22594 #undef TARGET_LEGITIMIZE_ADDRESS
22595 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22597 #undef TARGET_ATTRIBUTE_TABLE
22598 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22599 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22600 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22602 # undef TARGET_MERGE_DECL_ATTRIBUTES
22603 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22606 #undef TARGET_COMP_TYPE_ATTRIBUTES
22607 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22609 #undef TARGET_INIT_BUILTINS
22610 #define TARGET_INIT_BUILTINS ix86_init_builtins
22611 #undef TARGET_BUILTIN_DECL
22612 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22613 #undef TARGET_EXPAND_BUILTIN
22614 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22616 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22617 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22618 ix86_builtin_vectorized_function
22620 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22621 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22623 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22624 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22626 #undef TARGET_BUILTIN_RECIPROCAL
22627 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22629 #undef TARGET_ASM_FUNCTION_EPILOGUE
22630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22632 #undef TARGET_ENCODE_SECTION_INFO
22633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22639 #undef TARGET_ASM_OPEN_PAREN
22640 #define TARGET_ASM_OPEN_PAREN ""
22641 #undef TARGET_ASM_CLOSE_PAREN
22642 #define TARGET_ASM_CLOSE_PAREN ""
22644 #undef TARGET_ASM_BYTE_OP
22645 #define TARGET_ASM_BYTE_OP ASM_BYTE
22647 #undef TARGET_ASM_ALIGNED_HI_OP
22648 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22649 #undef TARGET_ASM_ALIGNED_SI_OP
22650 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22652 #undef TARGET_ASM_ALIGNED_DI_OP
22653 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22656 #undef TARGET_PROFILE_BEFORE_PROLOGUE
22657 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
22659 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
22660 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
22662 #undef TARGET_ASM_UNALIGNED_HI_OP
22663 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22664 #undef TARGET_ASM_UNALIGNED_SI_OP
22665 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22666 #undef TARGET_ASM_UNALIGNED_DI_OP
22667 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22669 #undef TARGET_PRINT_OPERAND
22670 #define TARGET_PRINT_OPERAND ix86_print_operand
22671 #undef TARGET_PRINT_OPERAND_ADDRESS
22672 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
22673 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
22674 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
22675 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
22676 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
22678 #undef TARGET_SCHED_INIT_GLOBAL
22679 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
22680 #undef TARGET_SCHED_ADJUST_COST
22681 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22682 #undef TARGET_SCHED_ISSUE_RATE
22683 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22684 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22685 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22686 ia32_multipass_dfa_lookahead
22687 #undef TARGET_SCHED_MACRO_FUSION_P
22688 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
22689 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
22690 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
22692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22695 #undef TARGET_MEMMODEL_CHECK
22696 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
22698 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
22699 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
22702 #undef TARGET_HAVE_TLS
22703 #define TARGET_HAVE_TLS true
22705 #undef TARGET_CANNOT_FORCE_CONST_MEM
22706 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22707 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22708 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
22710 #undef TARGET_DELEGITIMIZE_ADDRESS
22711 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22713 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
22714 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
22716 #undef TARGET_MS_BITFIELD_LAYOUT_P
22717 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22720 #undef TARGET_BINDS_LOCAL_P
22721 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22723 #undef TARGET_BINDS_LOCAL_P
22724 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
22726 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22727 #undef TARGET_BINDS_LOCAL_P
22728 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22731 #undef TARGET_ASM_OUTPUT_MI_THUNK
22732 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22736 #undef TARGET_ASM_FILE_START
22737 #define TARGET_ASM_FILE_START x86_file_start
22739 #undef TARGET_OPTION_OVERRIDE
22740 #define TARGET_OPTION_OVERRIDE ix86_option_override
22742 #undef TARGET_REGISTER_MOVE_COST
22743 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
22744 #undef TARGET_MEMORY_MOVE_COST
22745 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
22746 #undef TARGET_RTX_COSTS
22747 #define TARGET_RTX_COSTS ix86_rtx_costs
22748 #undef TARGET_ADDRESS_COST
22749 #define TARGET_ADDRESS_COST ix86_address_cost
22751 #undef TARGET_FLAGS_REGNUM
22752 #define TARGET_FLAGS_REGNUM FLAGS_REG
22753 #undef TARGET_FIXED_CONDITION_CODE_REGS
22754 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22755 #undef TARGET_CC_MODES_COMPATIBLE
22756 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22758 #undef TARGET_MACHINE_DEPENDENT_REORG
22759 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22761 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
22762 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
22764 #undef TARGET_BUILD_BUILTIN_VA_LIST
22765 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22767 #undef TARGET_FOLD_BUILTIN
22768 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
22770 #undef TARGET_GIMPLE_FOLD_BUILTIN
22771 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
22773 #undef TARGET_COMPARE_VERSION_PRIORITY
22774 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
22776 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
22777 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
22778 ix86_generate_version_dispatcher_body
22780 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
22781 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
22782 ix86_get_function_versions_dispatcher
22784 #undef TARGET_ENUM_VA_LIST_P
22785 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
22787 #undef TARGET_FN_ABI_VA_LIST
22788 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
22790 #undef TARGET_CANONICAL_VA_LIST_TYPE
22791 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
22793 #undef TARGET_EXPAND_BUILTIN_VA_START
22794 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
22796 #undef TARGET_MD_ASM_ADJUST
22797 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
22799 #undef TARGET_C_EXCESS_PRECISION
22800 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
22801 #undef TARGET_PROMOTE_PROTOTYPES
22802 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
22803 #undef TARGET_SETUP_INCOMING_VARARGS
22804 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22805 #undef TARGET_MUST_PASS_IN_STACK
22806 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22807 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
22808 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
22809 #undef TARGET_FUNCTION_ARG_ADVANCE
22810 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
22811 #undef TARGET_FUNCTION_ARG
22812 #define TARGET_FUNCTION_ARG ix86_function_arg
22813 #undef TARGET_INIT_PIC_REG
22814 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
22815 #undef TARGET_USE_PSEUDO_PIC_REG
22816 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
22817 #undef TARGET_FUNCTION_ARG_BOUNDARY
22818 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
22819 #undef TARGET_PASS_BY_REFERENCE
22820 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22821 #undef TARGET_INTERNAL_ARG_POINTER
22822 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22823 #undef TARGET_UPDATE_STACK_BOUNDARY
22824 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
22825 #undef TARGET_GET_DRAP_RTX
22826 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
22827 #undef TARGET_STRICT_ARGUMENT_NAMING
22828 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22829 #undef TARGET_STATIC_CHAIN
22830 #define TARGET_STATIC_CHAIN ix86_static_chain
22831 #undef TARGET_TRAMPOLINE_INIT
22832 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
22833 #undef TARGET_RETURN_POPS_ARGS
22834 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
22836 #undef TARGET_WARN_FUNC_RETURN
22837 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
22839 #undef TARGET_LEGITIMATE_COMBINED_INSN
22840 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
22842 #undef TARGET_ASAN_SHADOW_OFFSET
22843 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
22845 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22846 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22848 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22849 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22851 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22852 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22854 #undef TARGET_C_MODE_FOR_SUFFIX
22855 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
22858 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22859 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22862 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22863 #undef TARGET_INSERT_ATTRIBUTES
22864 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22867 #undef TARGET_MANGLE_TYPE
22868 #define TARGET_MANGLE_TYPE ix86_mangle_type
22870 #undef TARGET_STACK_PROTECT_GUARD
22871 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
22874 #undef TARGET_STACK_PROTECT_FAIL
22875 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22878 #undef TARGET_FUNCTION_VALUE
22879 #define TARGET_FUNCTION_VALUE ix86_function_value
22881 #undef TARGET_FUNCTION_VALUE_REGNO_P
22882 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
22884 #undef TARGET_PROMOTE_FUNCTION_MODE
22885 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
22887 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
22888 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
22890 #undef TARGET_MEMBER_TYPE_FORCES_BLK
22891 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
22893 #undef TARGET_INSTANTIATE_DECLS
22894 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
22896 #undef TARGET_SECONDARY_RELOAD
22897 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
22898 #undef TARGET_SECONDARY_MEMORY_NEEDED
22899 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
22900 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
22901 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
22903 #undef TARGET_CLASS_MAX_NREGS
22904 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
22906 #undef TARGET_PREFERRED_RELOAD_CLASS
22907 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
22908 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
22909 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
22910 #undef TARGET_CLASS_LIKELY_SPILLED_P
22911 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
22913 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
22914 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
22915 ix86_builtin_vectorization_cost
22916 #undef TARGET_VECTORIZE_VEC_PERM_CONST
22917 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
22918 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
22919 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
22920 ix86_preferred_simd_mode
22921 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
22922 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
22923 ix86_split_reduction
22924 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
22925 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
22926 ix86_autovectorize_vector_sizes
22927 #undef TARGET_VECTORIZE_GET_MASK_MODE
22928 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
22929 #undef TARGET_VECTORIZE_INIT_COST
22930 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
22931 #undef TARGET_VECTORIZE_ADD_STMT_COST
22932 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
22933 #undef TARGET_VECTORIZE_FINISH_COST
22934 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
22935 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
22936 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
22938 #undef TARGET_SET_CURRENT_FUNCTION
22939 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
22941 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
22942 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
22944 #undef TARGET_OPTION_SAVE
22945 #define TARGET_OPTION_SAVE ix86_function_specific_save
22947 #undef TARGET_OPTION_RESTORE
22948 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
22950 #undef TARGET_OPTION_POST_STREAM_IN
22951 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
22953 #undef TARGET_OPTION_PRINT
22954 #define TARGET_OPTION_PRINT ix86_function_specific_print
22956 #undef TARGET_OPTION_FUNCTION_VERSIONS
22957 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
22959 #undef TARGET_CAN_INLINE_P
22960 #define TARGET_CAN_INLINE_P ix86_can_inline_p
22962 #undef TARGET_LEGITIMATE_ADDRESS_P
22963 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
22965 #undef TARGET_REGISTER_PRIORITY
22966 #define TARGET_REGISTER_PRIORITY ix86_register_priority
22968 #undef TARGET_REGISTER_USAGE_LEVELING_P
22969 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
22971 #undef TARGET_LEGITIMATE_CONSTANT_P
22972 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
22974 #undef TARGET_COMPUTE_FRAME_LAYOUT
22975 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
22977 #undef TARGET_FRAME_POINTER_REQUIRED
22978 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
22980 #undef TARGET_CAN_ELIMINATE
22981 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
22983 #undef TARGET_EXTRA_LIVE_ON_ENTRY
22984 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
22986 #undef TARGET_ASM_CODE_END
22987 #define TARGET_ASM_CODE_END ix86_code_end
22989 #undef TARGET_CONDITIONAL_REGISTER_USAGE
22990 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
22992 #undef TARGET_CANONICALIZE_COMPARISON
22993 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
22995 #undef TARGET_LOOP_UNROLL_ADJUST
22996 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
22998 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22999 #undef TARGET_SPILL_CLASS
23000 #define TARGET_SPILL_CLASS ix86_spill_class
23002 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23003 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23004 ix86_simd_clone_compute_vecsize_and_simdlen
23006 #undef TARGET_SIMD_CLONE_ADJUST
23007 #define TARGET_SIMD_CLONE_ADJUST \
23008 ix86_simd_clone_adjust
23010 #undef TARGET_SIMD_CLONE_USABLE
23011 #define TARGET_SIMD_CLONE_USABLE \
23012 ix86_simd_clone_usable
23014 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23015 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23016 ix86_float_exceptions_rounding_supported_p
23018 #undef TARGET_MODE_EMIT
23019 #define TARGET_MODE_EMIT ix86_emit_mode_set
23021 #undef TARGET_MODE_NEEDED
23022 #define TARGET_MODE_NEEDED ix86_mode_needed
23024 #undef TARGET_MODE_AFTER
23025 #define TARGET_MODE_AFTER ix86_mode_after
23027 #undef TARGET_MODE_ENTRY
23028 #define TARGET_MODE_ENTRY ix86_mode_entry
23030 #undef TARGET_MODE_EXIT
23031 #define TARGET_MODE_EXIT ix86_mode_exit
23033 #undef TARGET_MODE_PRIORITY
23034 #define TARGET_MODE_PRIORITY ix86_mode_priority
23036 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23037 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23039 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
23040 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
23042 #undef TARGET_OFFLOAD_OPTIONS
23043 #define TARGET_OFFLOAD_OPTIONS \
23044 ix86_offload_options
23046 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23047 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23049 #undef TARGET_OPTAB_SUPPORTED_P
23050 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23052 #undef TARGET_HARD_REGNO_SCRATCH_OK
23053 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23055 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23056 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23058 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23059 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23061 #undef TARGET_INIT_LIBFUNCS
23062 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23064 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23065 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23067 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23068 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23070 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23071 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23073 #undef TARGET_HARD_REGNO_NREGS
23074 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23075 #undef TARGET_HARD_REGNO_MODE_OK
23076 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23078 #undef TARGET_MODES_TIEABLE_P
23079 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23081 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23082 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23083 ix86_hard_regno_call_part_clobbered
23085 #undef TARGET_CAN_CHANGE_MODE_CLASS
23086 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23088 #undef TARGET_STATIC_RTX_ALIGNMENT
23089 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23090 #undef TARGET_CONSTANT_ALIGNMENT
23091 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23093 #undef TARGET_EMPTY_RECORD_P
23094 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23096 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23097 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23099 #undef TARGET_GET_MULTILIB_ABI_NAME
23100 #define TARGET_GET_MULTILIB_ABI_NAME \
23101 ix86_get_multilib_abi_name
23103 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED
)
23105 #ifdef OPTION_GLIBC
23107 return (built_in_function
)fcode
== BUILT_IN_MEMPCPY
;
23115 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23116 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23119 #undef TARGET_RUN_TARGET_SELFTESTS
23120 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23121 #endif /* #if CHECKING_P */
23123 struct gcc_target targetm
= TARGET_INITIALIZER
;
23125 #include "gt-i386.h"