1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
63 #include "sched-int.h"
65 #include "tree-pass.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "gimple-fold.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
75 #include "tree-iterator.h"
77 #include "case-cfn-macros.h"
79 #include "fold-const-call.h"
81 #include "tree-ssanames.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
87 #include "symbol-summary.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
98 #include "function-abi.h"
99 #include "rtl-error.h"
101 /* This file should be included last. */
102 #include "target-def.h"
104 static rtx
legitimize_dllimport_symbol (rtx
, bool);
105 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
106 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
107 static void ix86_emit_restore_reg_using_pop (rtx
);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
124 const struct processor_costs
*ix86_tune_cost
= NULL
;
126 /* Set by -mtune or -Os. */
127 const struct processor_costs
*ix86_cost
= NULL
;
129 /* In case the average insn count for single function invocation is
130 lower than this constant, emit fast (but longer) prologue and
132 #define FAST_PROLOGUE_INSN_COUNT 20
134 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
135 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
136 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
137 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
139 /* Array of the smallest class containing reg number REGNO, indexed by
140 REGNO. Used by REGNO_REG_CLASS in i386.h. */
142 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
145 AREG
, DREG
, CREG
, BREG
,
147 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
149 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
150 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
151 /* arg pointer, flags, fpsr, frame */
152 NON_Q_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
154 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
155 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
157 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
158 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
160 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
161 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
162 /* SSE REX registers */
163 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
164 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
165 /* AVX-512 SSE registers */
166 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
167 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
168 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
169 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
170 /* Mask registers. */
171 ALL_MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
172 MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
175 /* The "default" register map used in 32bit mode. */
177 int const debugger_register_map
[FIRST_PSEUDO_REGISTER
] =
180 0, 2, 1, 3, 6, 7, 4, 5,
182 12, 13, 14, 15, 16, 17, 18, 19,
183 /* arg, flags, fpsr, frame */
184 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
185 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
187 21, 22, 23, 24, 25, 26, 27, 28,
189 29, 30, 31, 32, 33, 34, 35, 36,
190 /* extended integer registers */
191 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
192 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
193 /* extended sse registers */
194 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
195 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
196 /* AVX-512 registers 16-23 */
197 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
198 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
199 /* AVX-512 registers 24-31 */
200 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
201 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
203 93, 94, 95, 96, 97, 98, 99, 100
206 /* The "default" register map used in 64bit mode. */
208 int const debugger64_register_map
[FIRST_PSEUDO_REGISTER
] =
211 0, 1, 2, 3, 4, 5, 6, 7,
213 33, 34, 35, 36, 37, 38, 39, 40,
214 /* arg, flags, fpsr, frame */
215 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
216 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
218 17, 18, 19, 20, 21, 22, 23, 24,
220 41, 42, 43, 44, 45, 46, 47, 48,
221 /* extended integer registers */
222 8, 9, 10, 11, 12, 13, 14, 15,
223 /* extended SSE registers */
224 25, 26, 27, 28, 29, 30, 31, 32,
225 /* AVX-512 registers 16-23 */
226 67, 68, 69, 70, 71, 72, 73, 74,
227 /* AVX-512 registers 24-31 */
228 75, 76, 77, 78, 79, 80, 81, 82,
230 118, 119, 120, 121, 122, 123, 124, 125
233 /* Define the register numbers to be used in Dwarf debugging information.
234 The SVR4 reference port C compiler uses the following register numbers
235 in its Dwarf output code:
236 0 for %eax (gcc regno = 0)
237 1 for %ecx (gcc regno = 2)
238 2 for %edx (gcc regno = 1)
239 3 for %ebx (gcc regno = 3)
240 4 for %esp (gcc regno = 7)
241 5 for %ebp (gcc regno = 6)
242 6 for %esi (gcc regno = 4)
243 7 for %edi (gcc regno = 5)
244 The following three DWARF register numbers are never generated by
245 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
246 believed these numbers have these meanings.
247 8 for %eip (no gcc equivalent)
248 9 for %eflags (gcc regno = 17)
249 10 for %trapno (no gcc equivalent)
250 It is not at all clear how we should number the FP stack registers
251 for the x86 architecture. If the version of SDB on x86/svr4 were
252 a bit less brain dead with respect to floating-point then we would
253 have a precedent to follow with respect to DWARF register numbers
254 for x86 FP registers, but the SDB on x86/svr4 was so completely
255 broken with respect to FP registers that it is hardly worth thinking
256 of it as something to strive for compatibility with.
257 The version of x86/svr4 SDB I had does (partially)
258 seem to believe that DWARF register number 11 is associated with
259 the x86 register %st(0), but that's about all. Higher DWARF
260 register numbers don't seem to be associated with anything in
261 particular, and even for DWARF regno 11, SDB only seemed to under-
262 stand that it should say that a variable lives in %st(0) (when
263 asked via an `=' command) if we said it was in DWARF regno 11,
264 but SDB still printed garbage when asked for the value of the
265 variable in question (via a `/' command).
266 (Also note that the labels SDB printed for various FP stack regs
267 when doing an `x' command were all wrong.)
268 Note that these problems generally don't affect the native SVR4
269 C compiler because it doesn't allow the use of -O with -g and
270 because when it is *not* optimizing, it allocates a memory
271 location for each floating-point variable, and the memory
272 location is what gets described in the DWARF AT_location
273 attribute for the variable in question.
274 Regardless of the severe mental illness of the x86/svr4 SDB, we
275 do something sensible here and we use the following DWARF
276 register numbers. Note that these are all stack-top-relative
278 11 for %st(0) (gcc regno = 8)
279 12 for %st(1) (gcc regno = 9)
280 13 for %st(2) (gcc regno = 10)
281 14 for %st(3) (gcc regno = 11)
282 15 for %st(4) (gcc regno = 12)
283 16 for %st(5) (gcc regno = 13)
284 17 for %st(6) (gcc regno = 14)
285 18 for %st(7) (gcc regno = 15)
287 int const svr4_debugger_register_map
[FIRST_PSEUDO_REGISTER
] =
290 0, 2, 1, 3, 6, 7, 5, 4,
292 11, 12, 13, 14, 15, 16, 17, 18,
293 /* arg, flags, fpsr, frame */
294 IGNORED_DWARF_REGNUM
, 9,
295 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
297 21, 22, 23, 24, 25, 26, 27, 28,
299 29, 30, 31, 32, 33, 34, 35, 36,
300 /* extended integer registers */
301 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
302 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
303 /* extended sse registers */
304 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
305 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
306 /* AVX-512 registers 16-23 */
307 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
308 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
309 /* AVX-512 registers 24-31 */
310 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
311 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
313 93, 94, 95, 96, 97, 98, 99, 100
316 /* Define parameter passing and return registers. */
318 static int const x86_64_int_parameter_registers
[6] =
320 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
323 static int const x86_64_ms_abi_int_parameter_registers
[4] =
325 CX_REG
, DX_REG
, R8_REG
, R9_REG
328 static int const x86_64_int_return_registers
[4] =
330 AX_REG
, DX_REG
, DI_REG
, SI_REG
333 /* Define the structure for the machine field in struct function. */
335 struct GTY(()) stack_local_entry
{
339 struct stack_local_entry
*next
;
342 /* Which cpu are we scheduling for. */
343 enum attr_cpu ix86_schedule
;
345 /* Which cpu are we optimizing for. */
346 enum processor_type ix86_tune
;
348 /* Which instruction set architecture to use. */
349 enum processor_type ix86_arch
;
351 /* True if processor has SSE prefetch instruction. */
352 unsigned char ix86_prefetch_sse
;
354 /* Preferred alignment for stack boundary in bits. */
355 unsigned int ix86_preferred_stack_boundary
;
357 /* Alignment for incoming stack boundary in bits specified at
359 unsigned int ix86_user_incoming_stack_boundary
;
361 /* Default alignment for incoming stack boundary in bits. */
362 unsigned int ix86_default_incoming_stack_boundary
;
364 /* Alignment for incoming stack boundary in bits. */
365 unsigned int ix86_incoming_stack_boundary
;
367 /* True if there is no direct access to extern symbols. */
368 bool ix86_has_no_direct_extern_access
;
370 /* Calling abi specific va_list type nodes. */
371 tree sysv_va_list_type_node
;
372 tree ms_va_list_type_node
;
374 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
375 char internal_label_prefix
[16];
376 int internal_label_prefix_len
;
378 /* Fence to use after loop using movnt. */
381 /* Register class used for passing given 64bit part of the argument.
382 These represent classes as documented by the PS ABI, with the exception
383 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
384 use SF or DFmode move instead of DImode to avoid reformatting penalties.
386 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
387 whenever possible (upper half does contain padding). */
388 enum x86_64_reg_class
391 X86_64_INTEGER_CLASS
,
392 X86_64_INTEGERSI_CLASS
,
400 X86_64_COMPLEX_X87_CLASS
,
404 #define MAX_CLASSES 8
406 /* Table of constants used by fldpi, fldln2, etc.... */
407 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
408 static bool ext_80387_constants_init
;
411 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
412 static bool ix86_function_value_regno_p (const unsigned int);
413 static unsigned int ix86_function_arg_boundary (machine_mode
,
415 static rtx
ix86_static_chain (const_tree
, bool);
416 static int ix86_function_regparm (const_tree
, const_tree
);
417 static void ix86_compute_frame_layout (void);
418 static tree
ix86_canonical_va_list_type (tree
);
419 static unsigned int split_stack_prologue_scratch_regno (void);
420 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
422 static bool ix86_can_inline_p (tree
, tree
);
423 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
426 /* Whether -mtune= or -march= were specified */
427 int ix86_tune_defaulted
;
428 int ix86_arch_specified
;
430 /* Return true if a red-zone is in use. We can't use red-zone when
431 there are local indirect jumps, like "indirect_jump" or "tablejump",
432 which jumps to another place in the function, since "call" in the
433 indirect thunk pushes the return address onto stack, destroying
436 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
437 for CALL, in red-zone, we can allow local indirect jumps with
441 ix86_using_red_zone (void)
443 return (TARGET_RED_ZONE
444 && !TARGET_64BIT_MS_ABI
445 && (!cfun
->machine
->has_local_indirect_jump
446 || cfun
->machine
->indirect_branch_type
== indirect_branch_keep
));
449 /* Return true, if profiling code should be emitted before
450 prologue. Otherwise it returns false.
451 Note: For x86 with "hotfix" it is sorried. */
453 ix86_profile_before_prologue (void)
455 return flag_fentry
!= 0;
458 /* Update register usage after having seen the compiler flags. */
461 ix86_conditional_register_usage (void)
465 /* If there are no caller-saved registers, preserve all registers.
466 except fixed_regs and registers used for function return value
467 since aggregate_value_p checks call_used_regs[regno] on return
469 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
470 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
471 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
472 call_used_regs
[i
] = 0;
474 /* For 32-bit targets, disable the REX registers. */
477 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
478 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
479 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
480 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
481 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
482 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
485 /* See the definition of CALL_USED_REGISTERS in i386.h. */
486 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
488 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
490 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
492 /* Set/reset conditionally defined registers from
493 CALL_USED_REGISTERS initializer. */
494 if (call_used_regs
[i
] > 1)
495 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
497 /* Calculate registers of CLOBBERED_REGS register set
498 as call used registers from GENERAL_REGS register set. */
499 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
500 && call_used_regs
[i
])
501 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
504 /* If MMX is disabled, disable the registers. */
506 accessible_reg_set
&= ~reg_class_contents
[MMX_REGS
];
508 /* If SSE is disabled, disable the registers. */
510 accessible_reg_set
&= ~reg_class_contents
[ALL_SSE_REGS
];
512 /* If the FPU is disabled, disable the registers. */
513 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
514 accessible_reg_set
&= ~reg_class_contents
[FLOAT_REGS
];
516 /* If AVX512F is disabled, disable the registers. */
517 if (! TARGET_AVX512F
)
519 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
520 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
522 accessible_reg_set
&= ~reg_class_contents
[ALL_MASK_REGS
];
526 /* Canonicalize a comparison from one we don't have to one we do have. */
529 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
530 bool op0_preserve_value
)
532 /* The order of operands in x87 ficom compare is forced by combine in
533 simplify_comparison () function. Float operator is treated as RTX_OBJ
534 with a precedence over other operators and is always put in the first
535 place. Swap condition and operands to match ficom instruction. */
536 if (!op0_preserve_value
537 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
539 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
541 /* We are called only for compares that are split to SAHF instruction.
542 Ensure that we have setcc/jcc insn for the swapped condition. */
543 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
545 std::swap (*op0
, *op1
);
552 /* Hook to determine if one function can safely inline another. */
555 ix86_can_inline_p (tree caller
, tree callee
)
557 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
558 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
560 /* Changes of those flags can be tolerated for always inlines. Lets hope
561 user knows what he is doing. */
562 unsigned HOST_WIDE_INT always_inline_safe_mask
563 = (MASK_USE_8BIT_IDIV
| MASK_ACCUMULATE_OUTGOING_ARGS
564 | MASK_NO_ALIGN_STRINGOPS
| MASK_AVX256_SPLIT_UNALIGNED_LOAD
565 | MASK_AVX256_SPLIT_UNALIGNED_STORE
| MASK_CLD
566 | MASK_NO_FANCY_MATH_387
| MASK_IEEE_FP
| MASK_INLINE_ALL_STRINGOPS
567 | MASK_INLINE_STRINGOPS_DYNAMICALLY
| MASK_RECIP
| MASK_STACK_PROBE
568 | MASK_STV
| MASK_TLS_DIRECT_SEG_REFS
| MASK_VZEROUPPER
569 | MASK_NO_PUSH_ARGS
| MASK_OMIT_LEAF_FRAME_POINTER
);
573 callee_tree
= target_option_default_node
;
575 caller_tree
= target_option_default_node
;
576 if (callee_tree
== caller_tree
)
579 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
580 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
583 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
584 && lookup_attribute ("always_inline",
585 DECL_ATTRIBUTES (callee
)));
587 /* If callee only uses GPRs, ignore MASK_80387. */
588 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts
->x_ix86_target_flags
))
589 always_inline_safe_mask
|= MASK_80387
;
591 cgraph_node
*callee_node
= cgraph_node::get (callee
);
592 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
593 function can inline a SSE2 function but a SSE2 function can't inline
595 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
596 != callee_opts
->x_ix86_isa_flags
)
597 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
598 != callee_opts
->x_ix86_isa_flags2
))
601 /* See if we have the same non-isa options. */
602 else if ((!always_inline
603 && caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
604 || (caller_opts
->x_target_flags
& ~always_inline_safe_mask
)
605 != (callee_opts
->x_target_flags
& ~always_inline_safe_mask
))
608 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
609 /* If the calle doesn't use FP expressions differences in
610 ix86_fpmath can be ignored. We are called from FEs
611 for multi-versioning call optimization, so beware of
612 ipa_fn_summaries not available. */
613 && (! ipa_fn_summaries
614 || ipa_fn_summaries
->get (callee_node
) == NULL
615 || ipa_fn_summaries
->get (callee_node
)->fp_expressions
))
618 /* At this point we cannot identify whether arch or tune setting
619 comes from target attribute or not. So the most conservative way
620 is to allow the callee that uses default arch and tune string to
622 else if (!strcmp (callee_opts
->x_ix86_arch_string
, "x86-64")
623 && !strcmp (callee_opts
->x_ix86_tune_string
, "generic"))
626 /* See if arch, tune, etc. are the same. As previous ISA flags already
627 checks if callee's ISA is subset of caller's, do not block
628 always_inline attribute for callee even it has different arch. */
629 else if (!always_inline
&& caller_opts
->arch
!= callee_opts
->arch
)
632 else if (!always_inline
&& caller_opts
->tune
!= callee_opts
->tune
)
635 else if (!always_inline
636 && caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
645 /* Return true if this goes in large data/bss. */
648 ix86_in_large_data_p (tree exp
)
650 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
653 if (exp
== NULL_TREE
)
656 /* Functions are never large data. */
657 if (TREE_CODE (exp
) == FUNCTION_DECL
)
660 /* Automatic variables are never large data. */
661 if (VAR_P (exp
) && !is_global_var (exp
))
664 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
666 const char *section
= DECL_SECTION_NAME (exp
);
667 if (strcmp (section
, ".ldata") == 0
668 || strcmp (section
, ".lbss") == 0)
674 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
676 /* If this is an incomplete type with size 0, then we can't put it
677 in data because it might be too big when completed. Also,
678 int_size_in_bytes returns -1 if size can vary or is larger than
679 an integer in which case also it is safer to assume that it goes in
681 if (size
<= 0 || size
> ix86_section_threshold
)
688 /* i386-specific section flag to mark large sections. */
689 #define SECTION_LARGE SECTION_MACH_DEP
691 /* Switch to the appropriate section for output of DECL.
692 DECL is either a `VAR_DECL' node or a constant of some sort.
693 RELOC indicates whether forming the initial value of DECL requires
694 link-time relocations. */
696 ATTRIBUTE_UNUSED
static section
*
697 x86_64_elf_select_section (tree decl
, int reloc
,
698 unsigned HOST_WIDE_INT align
)
700 if (ix86_in_large_data_p (decl
))
702 const char *sname
= NULL
;
703 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
704 switch (categorize_decl_for_section (decl
, reloc
))
709 case SECCAT_DATA_REL
:
710 sname
= ".ldata.rel";
712 case SECCAT_DATA_REL_LOCAL
:
713 sname
= ".ldata.rel.local";
715 case SECCAT_DATA_REL_RO
:
716 sname
= ".ldata.rel.ro";
718 case SECCAT_DATA_REL_RO_LOCAL
:
719 sname
= ".ldata.rel.ro.local";
723 flags
|= SECTION_BSS
;
726 case SECCAT_RODATA_MERGE_STR
:
727 case SECCAT_RODATA_MERGE_STR_INIT
:
728 case SECCAT_RODATA_MERGE_CONST
:
730 flags
&= ~SECTION_WRITE
;
739 /* We don't split these for medium model. Place them into
740 default sections and hope for best. */
745 /* We might get called with string constants, but get_named_section
746 doesn't like them as they are not DECLs. Also, we need to set
747 flags in that case. */
749 return get_section (sname
, flags
, NULL
);
750 return get_named_section (decl
, sname
, reloc
);
753 return default_elf_select_section (decl
, reloc
, align
);
756 /* Select a set of attributes for section NAME based on the properties
757 of DECL and whether or not RELOC indicates that DECL's initializer
758 might contain runtime relocations. */
760 static unsigned int ATTRIBUTE_UNUSED
761 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
763 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
765 if (ix86_in_large_data_p (decl
))
766 flags
|= SECTION_LARGE
;
768 if (decl
== NULL_TREE
769 && (strcmp (name
, ".ldata.rel.ro") == 0
770 || strcmp (name
, ".ldata.rel.ro.local") == 0))
771 flags
|= SECTION_RELRO
;
773 if (strcmp (name
, ".lbss") == 0
774 || startswith (name
, ".lbss.")
775 || startswith (name
, ".gnu.linkonce.lb."))
776 flags
|= SECTION_BSS
;
781 /* Build up a unique section name, expressed as a
782 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
783 RELOC indicates whether the initial value of EXP requires
784 link-time relocations. */
786 static void ATTRIBUTE_UNUSED
787 x86_64_elf_unique_section (tree decl
, int reloc
)
789 if (ix86_in_large_data_p (decl
))
791 const char *prefix
= NULL
;
792 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
793 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
795 switch (categorize_decl_for_section (decl
, reloc
))
798 case SECCAT_DATA_REL
:
799 case SECCAT_DATA_REL_LOCAL
:
800 case SECCAT_DATA_REL_RO
:
801 case SECCAT_DATA_REL_RO_LOCAL
:
802 prefix
= one_only
? ".ld" : ".ldata";
805 prefix
= one_only
? ".lb" : ".lbss";
808 case SECCAT_RODATA_MERGE_STR
:
809 case SECCAT_RODATA_MERGE_STR_INIT
:
810 case SECCAT_RODATA_MERGE_CONST
:
811 prefix
= one_only
? ".lr" : ".lrodata";
820 /* We don't split these for medium model. Place them into
821 default sections and hope for best. */
826 const char *name
, *linkonce
;
829 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
830 name
= targetm
.strip_name_encoding (name
);
832 /* If we're using one_only, then there needs to be a .gnu.linkonce
833 prefix to the section name. */
834 linkonce
= one_only
? ".gnu.linkonce" : "";
836 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
838 set_decl_section_name (decl
, string
);
842 default_unique_section (decl
, reloc
);
847 #ifndef LARGECOMM_SECTION_ASM_OP
848 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
851 /* This says how to output assembler code to declare an
852 uninitialized external linkage data object.
854 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
857 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
858 const char *name
, unsigned HOST_WIDE_INT size
,
861 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
862 && size
> (unsigned int)ix86_section_threshold
)
864 switch_to_section (get_named_section (decl
, ".lbss", 0));
865 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
868 fputs (COMMON_ASM_OP
, file
);
869 assemble_name (file
, name
);
870 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
871 size
, align
/ BITS_PER_UNIT
);
875 /* Utility function for targets to use in implementing
876 ASM_OUTPUT_ALIGNED_BSS. */
879 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
880 unsigned HOST_WIDE_INT size
, unsigned align
)
882 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
883 && size
> (unsigned int)ix86_section_threshold
)
884 switch_to_section (get_named_section (decl
, ".lbss", 0));
886 switch_to_section (bss_section
);
887 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
888 #ifdef ASM_DECLARE_OBJECT_NAME
889 last_assemble_variable_decl
= decl
;
890 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
892 /* Standard thing is just output label for the object. */
893 ASM_OUTPUT_LABEL (file
, name
);
894 #endif /* ASM_DECLARE_OBJECT_NAME */
895 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
898 /* Decide whether we must probe the stack before any space allocation
899 on this target. It's essentially TARGET_STACK_PROBE except when
900 -fstack-check causes the stack to be already probed differently. */
903 ix86_target_stack_probe (void)
905 /* Do not probe the stack twice if static stack checking is enabled. */
906 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
909 return TARGET_STACK_PROBE
;
912 /* Decide whether we can make a sibling call to a function. DECL is the
913 declaration of the function being targeted by the call and EXP is the
914 CALL_EXPR representing the call. */
917 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
919 tree type
, decl_or_type
;
921 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
923 if (ix86_function_naked (current_function_decl
))
926 /* Sibling call isn't OK if there are no caller-saved registers
927 since all registers must be preserved before return. */
928 if (cfun
->machine
->no_caller_saved_registers
)
931 /* If we are generating position-independent code, we cannot sibcall
932 optimize direct calls to global functions, as the PLT requires
933 %ebx be live. (Darwin does not have a PLT.) */
941 /* If we need to align the outgoing stack, then sibcalling would
942 unalign the stack, which may break the called function. */
943 if (ix86_minimum_incoming_stack_boundary (true)
944 < PREFERRED_STACK_BOUNDARY
)
950 type
= TREE_TYPE (decl
);
954 /* We're looking at the CALL_EXPR, we need the type of the function. */
955 type
= CALL_EXPR_FN (exp
); /* pointer expression */
956 type
= TREE_TYPE (type
); /* pointer type */
957 type
= TREE_TYPE (type
); /* function type */
961 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
962 if ((OUTGOING_REG_PARM_STACK_SPACE (type
)
963 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl
)))
964 || (REG_PARM_STACK_SPACE (decl_or_type
)
965 != REG_PARM_STACK_SPACE (current_function_decl
)))
967 maybe_complain_about_tail_call (exp
,
968 "inconsistent size of stack space"
969 " allocated for arguments which are"
970 " passed in registers");
974 /* Check that the return value locations are the same. Like
975 if we are returning floats on the 80387 register stack, we cannot
976 make a sibcall from a function that doesn't return a float to a
977 function that does or, conversely, from a function that does return
978 a float to a function that doesn't; the necessary stack adjustment
979 would not be executed. This is also the place we notice
980 differences in the return value ABI. Note that it is ok for one
981 of the functions to have void return type as long as the return
982 value of the other is passed in a register. */
983 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
984 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
986 if (STACK_REG_P (a
) || STACK_REG_P (b
))
988 if (!rtx_equal_p (a
, b
))
991 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
993 else if (!rtx_equal_p (a
, b
))
998 /* The SYSV ABI has more call-clobbered registers;
999 disallow sibcalls from MS to SYSV. */
1000 if (cfun
->machine
->call_abi
== MS_ABI
1001 && ix86_function_type_abi (type
) == SYSV_ABI
)
1006 /* If this call is indirect, we'll need to be able to use a
1007 call-clobbered register for the address of the target function.
1008 Make sure that all such registers are not used for passing
1009 parameters. Note that DLLIMPORT functions and call to global
1010 function via GOT slot are indirect. */
1012 || (bind_global
&& flag_pic
&& !flag_plt
)
1013 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
))
1014 || flag_force_indirect_call
)
1016 /* Check if regparm >= 3 since arg_reg_available is set to
1017 false if regparm == 0. If regparm is 1 or 2, there is
1018 always a call-clobbered register available.
1020 ??? The symbol indirect call doesn't need a call-clobbered
1021 register. But we don't know if this is a symbol indirect
1022 call or not here. */
1023 if (ix86_function_regparm (type
, decl
) >= 3
1024 && !cfun
->machine
->arg_reg_available
)
1029 if (decl
&& ix86_use_pseudo_pic_reg ())
1031 /* When PIC register is used, it must be restored after ifunc
1032 function returns. */
1033 cgraph_node
*node
= cgraph_node::get (decl
);
1034 if (node
&& node
->ifunc_resolver
)
1038 /* Disable sibcall if callee has indirect_return attribute and
1039 caller doesn't since callee will return to the caller's caller
1040 via an indirect jump. */
1041 if (((flag_cf_protection
& (CF_RETURN
| CF_BRANCH
))
1042 == (CF_RETURN
| CF_BRANCH
))
1043 && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type
))
1044 && !lookup_attribute ("indirect_return",
1045 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
))))
1048 /* Otherwise okay. That also includes certain types of indirect calls. */
1052 /* This function determines from TYPE the calling-convention. */
1055 ix86_get_callcvt (const_tree type
)
1057 unsigned int ret
= 0;
1062 return IX86_CALLCVT_CDECL
;
1064 attrs
= TYPE_ATTRIBUTES (type
);
1065 if (attrs
!= NULL_TREE
)
1067 if (lookup_attribute ("cdecl", attrs
))
1068 ret
|= IX86_CALLCVT_CDECL
;
1069 else if (lookup_attribute ("stdcall", attrs
))
1070 ret
|= IX86_CALLCVT_STDCALL
;
1071 else if (lookup_attribute ("fastcall", attrs
))
1072 ret
|= IX86_CALLCVT_FASTCALL
;
1073 else if (lookup_attribute ("thiscall", attrs
))
1074 ret
|= IX86_CALLCVT_THISCALL
;
1076 /* Regparam isn't allowed for thiscall and fastcall. */
1077 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
1079 if (lookup_attribute ("regparm", attrs
))
1080 ret
|= IX86_CALLCVT_REGPARM
;
1081 if (lookup_attribute ("sseregparm", attrs
))
1082 ret
|= IX86_CALLCVT_SSEREGPARM
;
1085 if (IX86_BASE_CALLCVT(ret
) != 0)
1089 is_stdarg
= stdarg_p (type
);
1090 if (TARGET_RTD
&& !is_stdarg
)
1091 return IX86_CALLCVT_STDCALL
| ret
;
1095 || TREE_CODE (type
) != METHOD_TYPE
1096 || ix86_function_type_abi (type
) != MS_ABI
)
1097 return IX86_CALLCVT_CDECL
| ret
;
1099 return IX86_CALLCVT_THISCALL
;
1102 /* Return 0 if the attributes for two types are incompatible, 1 if they
1103 are compatible, and 2 if they are nearly compatible (which causes a
1104 warning to be generated). */
1107 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
1109 unsigned int ccvt1
, ccvt2
;
1111 if (TREE_CODE (type1
) != FUNCTION_TYPE
1112 && TREE_CODE (type1
) != METHOD_TYPE
)
1115 ccvt1
= ix86_get_callcvt (type1
);
1116 ccvt2
= ix86_get_callcvt (type2
);
1119 if (ix86_function_regparm (type1
, NULL
)
1120 != ix86_function_regparm (type2
, NULL
))
1126 /* Return the regparm value for a function with the indicated TYPE and DECL.
1127 DECL may be NULL when calling function indirectly
1128 or considering a libcall. */
1131 ix86_function_regparm (const_tree type
, const_tree decl
)
1138 return (ix86_function_type_abi (type
) == SYSV_ABI
1139 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
1140 ccvt
= ix86_get_callcvt (type
);
1141 regparm
= ix86_regparm
;
1143 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
1145 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1148 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1152 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1154 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1157 /* Use register calling convention for local functions when possible. */
1159 && TREE_CODE (decl
) == FUNCTION_DECL
)
1161 cgraph_node
*target
= cgraph_node::get (decl
);
1163 target
= target
->function_symbol ();
1165 /* Caller and callee must agree on the calling convention, so
1166 checking here just optimize means that with
1167 __attribute__((optimize (...))) caller could use regparm convention
1168 and callee not, or vice versa. Instead look at whether the callee
1169 is optimized or not. */
1170 if (target
&& opt_for_fn (target
->decl
, optimize
)
1171 && !(profile_flag
&& !flag_fentry
))
1173 if (target
->local
&& target
->can_change_signature
)
1175 int local_regparm
, globals
= 0, regno
;
1177 /* Make sure no regparm register is taken by a
1178 fixed register variable. */
1179 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
1181 if (fixed_regs
[local_regparm
])
1184 /* We don't want to use regparm(3) for nested functions as
1185 these use a static chain pointer in the third argument. */
1186 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
1189 /* Save a register for the split stack. */
1190 if (flag_split_stack
)
1192 if (local_regparm
== 3)
1194 else if (local_regparm
== 2
1195 && DECL_STATIC_CHAIN (target
->decl
))
1199 /* Each fixed register usage increases register pressure,
1200 so less registers should be used for argument passing.
1201 This functionality can be overriden by an explicit
1203 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
1204 if (fixed_regs
[regno
])
1208 = globals
< local_regparm
? local_regparm
- globals
: 0;
1210 if (local_regparm
> regparm
)
1211 regparm
= local_regparm
;
1219 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1220 DFmode (2) arguments in SSE registers for a function with the
1221 indicated TYPE and DECL. DECL may be NULL when calling function
1222 indirectly or considering a libcall. Return -1 if any FP parameter
1223 should be rejected by error. This is used in siutation we imply SSE
1224 calling convetion but the function is called from another function with
1225 SSE disabled. Otherwise return 0. */
1228 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
1230 gcc_assert (!TARGET_64BIT
);
1232 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1233 by the sseregparm attribute. */
1234 if (TARGET_SSEREGPARM
1235 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
1242 error ("calling %qD with attribute sseregparm without "
1243 "SSE/SSE2 enabled", decl
);
1245 error ("calling %qT with attribute sseregparm without "
1246 "SSE/SSE2 enabled", type
);
1257 cgraph_node
*target
= cgraph_node::get (decl
);
1259 target
= target
->function_symbol ();
1261 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1262 (and DFmode for SSE2) arguments in SSE registers. */
1264 /* TARGET_SSE_MATH */
1265 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
1266 && opt_for_fn (target
->decl
, optimize
)
1267 && !(profile_flag
&& !flag_fentry
))
1269 if (target
->local
&& target
->can_change_signature
)
1271 /* Refuse to produce wrong code when local function with SSE enabled
1272 is called from SSE disabled function.
1273 FIXME: We need a way to detect these cases cross-ltrans partition
1274 and avoid using SSE calling conventions on local functions called
1275 from function with SSE disabled. For now at least delay the
1276 warning until we know we are going to produce wrong code.
1278 if (!TARGET_SSE
&& warn
)
1280 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
1281 ->x_ix86_isa_flags
) ? 2 : 1;
1288 /* Return true if EAX is live at the start of the function. Used by
1289 ix86_expand_prologue to determine if we need special help before
1290 calling allocate_stack_worker. */
1293 ix86_eax_live_at_start_p (void)
1295 /* Cheat. Don't bother working forward from ix86_function_regparm
1296 to the function type to whether an actual argument is located in
1297 eax. Instead just look at cfg info, which is still close enough
1298 to correct at this point. This gives false positives for broken
1299 functions that might use uninitialized data that happens to be
1300 allocated in eax, but who cares? */
1301 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
1305 ix86_keep_aggregate_return_pointer (tree fntype
)
1311 attr
= lookup_attribute ("callee_pop_aggregate_return",
1312 TYPE_ATTRIBUTES (fntype
));
1314 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
1316 /* For 32-bit MS-ABI the default is to keep aggregate
1318 if (ix86_function_type_abi (fntype
) == MS_ABI
)
1321 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
1324 /* Value is the number of bytes of arguments automatically
1325 popped when returning from a subroutine call.
1326 FUNDECL is the declaration node of the function (as a tree),
1327 FUNTYPE is the data type of the function (as a tree),
1328 or for a library call it is an identifier node for the subroutine name.
1329 SIZE is the number of bytes of arguments passed on the stack.
1331 On the 80386, the RTD insn may be used to pop them if the number
1332 of args is fixed, but if the number is variable then the caller
1333 must pop them all. RTD can't be used for library calls now
1334 because the library is compiled with the Unix compiler.
1335 Use of RTD is a selectable option, since it is incompatible with
1336 standard Unix calling sequences. If the option is not selected,
1337 the caller must always pop the args.
1339 The attribute stdcall is equivalent to RTD on a per module basis. */
1342 ix86_return_pops_args (tree fundecl
, tree funtype
, poly_int64 size
)
1346 /* None of the 64-bit ABIs pop arguments. */
1350 ccvt
= ix86_get_callcvt (funtype
);
1352 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
1353 | IX86_CALLCVT_THISCALL
)) != 0
1354 && ! stdarg_p (funtype
))
1357 /* Lose any fake structure return argument if it is passed on the stack. */
1358 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1359 && !ix86_keep_aggregate_return_pointer (funtype
))
1361 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1363 return GET_MODE_SIZE (Pmode
);
1369 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1372 ix86_legitimate_combined_insn (rtx_insn
*insn
)
1376 /* Check operand constraints in case hard registers were propagated
1377 into insn pattern. This check prevents combine pass from
1378 generating insn patterns with invalid hard register operands.
1379 These invalid insns can eventually confuse reload to error out
1380 with a spill failure. See also PRs 46829 and 46843. */
1382 gcc_assert (INSN_CODE (insn
) >= 0);
1384 extract_insn (insn
);
1385 preprocess_constraints (insn
);
1387 int n_operands
= recog_data
.n_operands
;
1388 int n_alternatives
= recog_data
.n_alternatives
;
1389 for (i
= 0; i
< n_operands
; i
++)
1391 rtx op
= recog_data
.operand
[i
];
1392 machine_mode mode
= GET_MODE (op
);
1393 const operand_alternative
*op_alt
;
1398 /* A unary operator may be accepted by the predicate, but it
1399 is irrelevant for matching constraints. */
1405 if (REG_P (SUBREG_REG (op
))
1406 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
1407 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
1408 GET_MODE (SUBREG_REG (op
)),
1411 op
= SUBREG_REG (op
);
1414 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
1417 op_alt
= recog_op_alt
;
1419 /* Operand has no constraints, anything is OK. */
1420 win
= !n_alternatives
;
1422 alternative_mask preferred
= get_preferred_alternatives (insn
);
1423 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
1425 if (!TEST_BIT (preferred
, j
))
1427 if (op_alt
[i
].anything_ok
1428 || (op_alt
[i
].matches
!= -1
1430 (recog_data
.operand
[i
],
1431 recog_data
.operand
[op_alt
[i
].matches
]))
1432 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
1446 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1448 static unsigned HOST_WIDE_INT
1449 ix86_asan_shadow_offset (void)
1451 return SUBTARGET_SHADOW_OFFSET
;
1454 /* Argument support functions. */
1456 /* Return true when register may be used to pass function parameters. */
1458 ix86_function_arg_regno_p (int regno
)
1461 enum calling_abi call_abi
;
1462 const int *parm_regs
;
1464 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
1465 && regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)
1469 return (regno
< REGPARM_MAX
1470 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
1471 && regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
));
1473 /* TODO: The function should depend on current function ABI but
1474 builtins.cc would need updating then. Therefore we use the
1476 call_abi
= ix86_cfun_abi ();
1478 /* RAX is used as hidden argument to va_arg functions. */
1479 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
1482 if (call_abi
== MS_ABI
)
1483 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
1485 parm_regs
= x86_64_int_parameter_registers
;
1487 for (i
= 0; i
< (call_abi
== MS_ABI
1488 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
1489 if (regno
== parm_regs
[i
])
1494 /* Return if we do not know how to pass ARG solely in registers. */
1497 ix86_must_pass_in_stack (const function_arg_info
&arg
)
1499 if (must_pass_in_stack_var_size_or_pad (arg
))
1502 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1503 The layout_type routine is crafty and tries to trick us into passing
1504 currently unsupported vector types on the stack by using TImode. */
1505 return (!TARGET_64BIT
&& arg
.mode
== TImode
1506 && arg
.type
&& TREE_CODE (arg
.type
) != VECTOR_TYPE
);
1509 /* It returns the size, in bytes, of the area reserved for arguments passed
1510 in registers for the function represented by fndecl dependent to the used
1513 ix86_reg_parm_stack_space (const_tree fndecl
)
1515 enum calling_abi call_abi
= SYSV_ABI
;
1516 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
1517 call_abi
= ix86_function_abi (fndecl
);
1519 call_abi
= ix86_function_type_abi (fndecl
);
1520 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
1525 /* We add this as a workaround in order to use libc_has_function
1528 ix86_libc_has_function (enum function_class fn_class
)
1530 return targetm
.libc_has_function (fn_class
, NULL_TREE
);
1533 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1534 specifying the call abi used. */
1536 ix86_function_type_abi (const_tree fntype
)
1538 enum calling_abi abi
= ix86_abi
;
1540 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
1544 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
1547 if (TARGET_X32
&& !warned
)
1549 error ("X32 does not support %<ms_abi%> attribute");
1555 else if (abi
== MS_ABI
1556 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
1563 ix86_function_abi (const_tree fndecl
)
1565 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
1568 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1569 specifying the call abi used. */
1571 ix86_cfun_abi (void)
1573 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
1577 ix86_function_ms_hook_prologue (const_tree fn
)
1579 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
1581 if (decl_function_context (fn
) != NULL_TREE
)
1582 error_at (DECL_SOURCE_LOCATION (fn
),
1583 "%<ms_hook_prologue%> attribute is not compatible "
1584 "with nested function");
1592 ix86_function_naked (const_tree fn
)
1594 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
1600 /* Write the extra assembler code needed to declare a function properly. */
1603 ix86_asm_output_function_label (FILE *out_file
, const char *fname
,
1606 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
1609 cfun
->machine
->function_label_emitted
= true;
1613 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
1614 unsigned int filler_cc
= 0xcccccccc;
1616 for (i
= 0; i
< filler_count
; i
+= 4)
1617 fprintf (out_file
, ASM_LONG
" %#x\n", filler_cc
);
1620 #ifdef SUBTARGET_ASM_UNWIND_INIT
1621 SUBTARGET_ASM_UNWIND_INIT (out_file
);
1624 ASM_OUTPUT_LABEL (out_file
, fname
);
1626 /* Output magic byte marker, if hot-patch attribute is set. */
1631 /* leaq [%rsp + 0], %rsp */
1632 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1637 /* movl.s %edi, %edi
1639 movl.s %esp, %ebp */
1640 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file
);
1645 /* Implementation of call abi switching target hook. Specific to FNDECL
1646 the specific call register sets are set. See also
1647 ix86_conditional_register_usage for more details. */
1649 ix86_call_abi_override (const_tree fndecl
)
1651 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
1654 /* Return 1 if pseudo register should be created and used to hold
1655 GOT address for PIC code. */
1657 ix86_use_pseudo_pic_reg (void)
1660 && (ix86_cmodel
== CM_SMALL_PIC
1667 /* Initialize large model PIC register. */
1670 ix86_init_large_pic_reg (unsigned int tmp_regno
)
1672 rtx_code_label
*label
;
1675 gcc_assert (Pmode
== DImode
);
1676 label
= gen_label_rtx ();
1678 LABEL_PRESERVE_P (label
) = 1;
1679 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
1680 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
1681 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
1683 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
1684 emit_insn (gen_add2_insn (pic_offset_table_rtx
, tmp_reg
));
1685 const char *name
= LABEL_NAME (label
);
1686 PUT_CODE (label
, NOTE
);
1687 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
1688 NOTE_DELETED_LABEL_NAME (label
) = name
;
1691 /* Create and initialize PIC register if required. */
1693 ix86_init_pic_reg (void)
1698 if (!ix86_use_pseudo_pic_reg ())
1705 if (ix86_cmodel
== CM_LARGE_PIC
)
1706 ix86_init_large_pic_reg (R11_REG
);
1708 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
1712 /* If there is future mcount call in the function it is more profitable
1713 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1714 rtx reg
= crtl
->profile
1715 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
1716 : pic_offset_table_rtx
;
1717 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
1718 RTX_FRAME_RELATED_P (insn
) = 1;
1720 emit_move_insn (pic_offset_table_rtx
, reg
);
1721 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
1727 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1728 insert_insn_on_edge (seq
, entry_edge
);
1729 commit_one_edge_insertion (entry_edge
);
1732 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1733 for a call to a function whose data type is FNTYPE.
1734 For a library call, FNTYPE is 0. */
1737 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1738 tree fntype
, /* tree ptr for function decl */
1739 rtx libname
, /* SYMBOL_REF of library name or 0 */
1743 struct cgraph_node
*local_info_node
= NULL
;
1744 struct cgraph_node
*target
= NULL
;
1746 /* Set silent_p to false to raise an error for invalid calls when
1747 expanding function body. */
1748 cfun
->machine
->silent_p
= false;
1750 memset (cum
, 0, sizeof (*cum
));
1754 target
= cgraph_node::get (fndecl
);
1757 target
= target
->function_symbol ();
1758 local_info_node
= cgraph_node::local_info_node (target
->decl
);
1759 cum
->call_abi
= ix86_function_abi (target
->decl
);
1762 cum
->call_abi
= ix86_function_abi (fndecl
);
1765 cum
->call_abi
= ix86_function_type_abi (fntype
);
1767 cum
->caller
= caller
;
1769 /* Set up the number of registers to use for passing arguments. */
1770 cum
->nregs
= ix86_regparm
;
1773 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
1774 ? X86_64_REGPARM_MAX
1775 : X86_64_MS_REGPARM_MAX
);
1779 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1782 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
1783 ? X86_64_SSE_REGPARM_MAX
1784 : X86_64_MS_SSE_REGPARM_MAX
);
1788 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1789 cum
->warn_avx512f
= true;
1790 cum
->warn_avx
= true;
1791 cum
->warn_sse
= true;
1792 cum
->warn_mmx
= true;
1794 /* Because type might mismatch in between caller and callee, we need to
1795 use actual type of function for local calls.
1796 FIXME: cgraph_analyze can be told to actually record if function uses
1797 va_start so for local functions maybe_vaarg can be made aggressive
1799 FIXME: once typesytem is fixed, we won't need this code anymore. */
1800 if (local_info_node
&& local_info_node
->local
1801 && local_info_node
->can_change_signature
)
1802 fntype
= TREE_TYPE (target
->decl
);
1803 cum
->stdarg
= stdarg_p (fntype
);
1804 cum
->maybe_vaarg
= (fntype
1805 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
1810 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
1811 if (!cum
->warn_empty
&& fntype
)
1813 function_args_iterator iter
;
1815 bool seen_empty_type
= false;
1816 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
1818 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
1820 if (TYPE_EMPTY_P (argtype
))
1821 seen_empty_type
= true;
1822 else if (seen_empty_type
)
1824 cum
->warn_empty
= true;
1832 /* If there are variable arguments, then we won't pass anything
1833 in registers in 32-bit mode. */
1834 if (stdarg_p (fntype
))
1837 /* Since in 32-bit, variable arguments are always passed on
1838 stack, there is scratch register available for indirect
1840 cfun
->machine
->arg_reg_available
= true;
1843 cum
->warn_avx512f
= false;
1844 cum
->warn_avx
= false;
1845 cum
->warn_sse
= false;
1846 cum
->warn_mmx
= false;
1850 /* Use ecx and edx registers if function has fastcall attribute,
1851 else look for regparm information. */
1854 unsigned int ccvt
= ix86_get_callcvt (fntype
);
1855 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1858 cum
->fastcall
= 1; /* Same first register as in fastcall. */
1860 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1866 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1869 /* Set up the number of SSE registers used for passing SFmode
1870 and DFmode arguments. Warn for mismatching ABI. */
1871 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
1874 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
1877 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1878 But in the case of vector types, it is some vector mode.
1880 When we have only some of our vector isa extensions enabled, then there
1881 are some modes for which vector_mode_supported_p is false. For these
1882 modes, the generic vector support in gcc will choose some non-vector mode
1883 in order to implement the type. By computing the natural mode, we'll
1884 select the proper ABI location for the operand and not depend on whatever
1885 the middle-end decides to do with these vector types.
1887 The midde-end can't deal with the vector types > 16 bytes. In this
1888 case, we return the original mode and warn ABI change if CUM isn't
1891 If INT_RETURN is true, warn ABI change if the vector mode isn't
1892 available for function return value. */
1895 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
1898 machine_mode mode
= TYPE_MODE (type
);
1900 if (VECTOR_TYPE_P (type
) && !VECTOR_MODE_P (mode
))
1902 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1903 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
1904 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1905 && TYPE_VECTOR_SUBPARTS (type
) > 1)
1907 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
1909 /* There are no XFmode vector modes ... */
1910 if (innermode
== XFmode
)
1913 /* ... and no decimal float vector modes. */
1914 if (DECIMAL_FLOAT_MODE_P (innermode
))
1917 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type
)))
1918 mode
= MIN_MODE_VECTOR_FLOAT
;
1920 mode
= MIN_MODE_VECTOR_INT
;
1922 /* Get the mode which has this inner mode and number of units. */
1923 FOR_EACH_MODE_FROM (mode
, mode
)
1924 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
1925 && GET_MODE_INNER (mode
) == innermode
)
1927 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
1929 static bool warnedavx512f
;
1930 static bool warnedavx512f_ret
;
1932 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
1934 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
1935 "without AVX512F enabled changes the ABI"))
1936 warnedavx512f
= true;
1938 else if (in_return
&& !warnedavx512f_ret
)
1940 if (warning (OPT_Wpsabi
, "AVX512F vector return "
1941 "without AVX512F enabled changes the ABI"))
1942 warnedavx512f_ret
= true;
1945 return TYPE_MODE (type
);
1947 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
1949 static bool warnedavx
;
1950 static bool warnedavx_ret
;
1952 if (cum
&& cum
->warn_avx
&& !warnedavx
)
1954 if (warning (OPT_Wpsabi
, "AVX vector argument "
1955 "without AVX enabled changes the ABI"))
1958 else if (in_return
&& !warnedavx_ret
)
1960 if (warning (OPT_Wpsabi
, "AVX vector return "
1961 "without AVX enabled changes the ABI"))
1962 warnedavx_ret
= true;
1965 return TYPE_MODE (type
);
1967 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
1971 static bool warnedsse
;
1972 static bool warnedsse_ret
;
1974 if (cum
&& cum
->warn_sse
&& !warnedsse
)
1976 if (warning (OPT_Wpsabi
, "SSE vector argument "
1977 "without SSE enabled changes the ABI"))
1980 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
1982 if (warning (OPT_Wpsabi
, "SSE vector return "
1983 "without SSE enabled changes the ABI"))
1984 warnedsse_ret
= true;
1987 else if ((size
== 8 && !TARGET_64BIT
)
1989 || cfun
->machine
->func_type
== TYPE_NORMAL
)
1993 static bool warnedmmx
;
1994 static bool warnedmmx_ret
;
1996 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
1998 if (warning (OPT_Wpsabi
, "MMX vector argument "
1999 "without MMX enabled changes the ABI"))
2002 else if (in_return
&& !warnedmmx_ret
)
2004 if (warning (OPT_Wpsabi
, "MMX vector return "
2005 "without MMX enabled changes the ABI"))
2006 warnedmmx_ret
= true;
2019 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2020 this may not agree with the mode that the type system has chosen for the
2021 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2022 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2025 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
2030 if (orig_mode
!= BLKmode
)
2031 tmp
= gen_rtx_REG (orig_mode
, regno
);
2034 tmp
= gen_rtx_REG (mode
, regno
);
2035 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2036 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2042 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2043 of this code is to classify each 8bytes of incoming argument by the register
2044 class and assign registers accordingly. */
2046 /* Return the union class of CLASS1 and CLASS2.
2047 See the x86-64 PS ABI for details. */
2049 static enum x86_64_reg_class
2050 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2052 /* Rule #1: If both classes are equal, this is the resulting class. */
2053 if (class1
== class2
)
2056 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2058 if (class1
== X86_64_NO_CLASS
)
2060 if (class2
== X86_64_NO_CLASS
)
2063 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2064 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2065 return X86_64_MEMORY_CLASS
;
2067 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2068 if ((class1
== X86_64_INTEGERSI_CLASS
2069 && (class2
== X86_64_SSESF_CLASS
|| class2
== X86_64_SSEHF_CLASS
))
2070 || (class2
== X86_64_INTEGERSI_CLASS
2071 && (class1
== X86_64_SSESF_CLASS
|| class1
== X86_64_SSEHF_CLASS
)))
2072 return X86_64_INTEGERSI_CLASS
;
2073 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2074 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2075 return X86_64_INTEGER_CLASS
;
2077 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2079 if (class1
== X86_64_X87_CLASS
2080 || class1
== X86_64_X87UP_CLASS
2081 || class1
== X86_64_COMPLEX_X87_CLASS
2082 || class2
== X86_64_X87_CLASS
2083 || class2
== X86_64_X87UP_CLASS
2084 || class2
== X86_64_COMPLEX_X87_CLASS
)
2085 return X86_64_MEMORY_CLASS
;
2087 /* Rule #6: Otherwise class SSE is used. */
2088 return X86_64_SSE_CLASS
;
2091 /* Classify the argument of type TYPE and mode MODE.
2092 CLASSES will be filled by the register class used to pass each word
2093 of the operand. The number of words is returned. In case the parameter
2094 should be passed in memory, 0 is returned. As a special case for zero
2095 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2097 BIT_OFFSET is used internally for handling records and specifies offset
2098 of the offset in bits modulo 512 to avoid overflow cases.
2100 See the x86-64 PS ABI for details.
2104 classify_argument (machine_mode mode
, const_tree type
,
2105 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
,
2106 int &zero_width_bitfields
)
2109 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2110 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
2112 /* Variable sized entities are always passed/returned in memory. */
2116 if (mode
!= VOIDmode
)
2118 /* The value of "named" doesn't matter. */
2119 function_arg_info
arg (const_cast<tree
> (type
), mode
, /*named=*/true);
2120 if (targetm
.calls
.must_pass_in_stack (arg
))
2124 if (type
&& (AGGREGATE_TYPE_P (type
)
2125 || (TREE_CODE (type
) == BITINT_TYPE
&& words
> 1)))
2129 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2131 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2135 for (i
= 0; i
< words
; i
++)
2136 classes
[i
] = X86_64_NO_CLASS
;
2138 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2139 signalize memory class, so handle it as special case. */
2142 classes
[0] = X86_64_NO_CLASS
;
2146 /* Classify each field of record and merge classes. */
2147 switch (TREE_CODE (type
))
2150 /* And now merge the fields of structure. */
2151 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2153 if (TREE_CODE (field
) == FIELD_DECL
)
2157 if (TREE_TYPE (field
) == error_mark_node
)
2160 /* Bitfields are always classified as integer. Handle them
2161 early, since later code would consider them to be
2162 misaligned integers. */
2163 if (DECL_BIT_FIELD (field
))
2165 if (integer_zerop (DECL_SIZE (field
)))
2167 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
2169 if (zero_width_bitfields
!= 2)
2171 zero_width_bitfields
= 1;
2175 for (i
= (int_bit_position (field
)
2176 + (bit_offset
% 64)) / 8 / 8;
2177 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2178 + tree_to_shwi (DECL_SIZE (field
))
2181 = merge_classes (X86_64_INTEGER_CLASS
, classes
[i
]);
2187 type
= TREE_TYPE (field
);
2189 /* Flexible array member is ignored. */
2190 if (TYPE_MODE (type
) == BLKmode
2191 && TREE_CODE (type
) == ARRAY_TYPE
2192 && TYPE_SIZE (type
) == NULL_TREE
2193 && TYPE_DOMAIN (type
) != NULL_TREE
2194 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
2199 if (!warned
&& warn_psabi
)
2202 inform (input_location
,
2203 "the ABI of passing struct with"
2204 " a flexible array member has"
2205 " changed in GCC 4.4");
2209 num
= classify_argument (TYPE_MODE (type
), type
,
2211 (int_bit_position (field
)
2212 + bit_offset
) % 512,
2213 zero_width_bitfields
);
2216 pos
= (int_bit_position (field
)
2217 + (bit_offset
% 64)) / 8 / 8;
2218 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
2220 = merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2227 /* Arrays are handled as small records. */
2230 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2231 TREE_TYPE (type
), subclasses
, bit_offset
,
2232 zero_width_bitfields
);
2236 /* The partial classes are now full classes. */
2237 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2238 subclasses
[0] = X86_64_SSE_CLASS
;
2239 if (subclasses
[0] == X86_64_SSEHF_CLASS
&& bytes
!= 2)
2240 subclasses
[0] = X86_64_SSE_CLASS
;
2241 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
2242 && !((bit_offset
% 64) == 0 && bytes
== 4))
2243 subclasses
[0] = X86_64_INTEGER_CLASS
;
2245 for (i
= 0; i
< words
; i
++)
2246 classes
[i
] = subclasses
[i
% num
];
2251 case QUAL_UNION_TYPE
:
2252 /* Unions are similar to RECORD_TYPE but offset is always 0.
2254 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2256 if (TREE_CODE (field
) == FIELD_DECL
)
2260 if (TREE_TYPE (field
) == error_mark_node
)
2263 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2264 TREE_TYPE (field
), subclasses
,
2265 bit_offset
, zero_width_bitfields
);
2268 for (i
= 0; i
< num
&& i
< words
; i
++)
2269 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2275 /* _BitInt(N) for N > 64 is passed as structure containing
2276 (N + 63) / 64 64-bit elements. */
2279 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2288 /* When size > 16 bytes, if the first one isn't
2289 X86_64_SSE_CLASS or any other ones aren't
2290 X86_64_SSEUP_CLASS, everything should be passed in
2292 if (classes
[0] != X86_64_SSE_CLASS
)
2295 for (i
= 1; i
< words
; i
++)
2296 if (classes
[i
] != X86_64_SSEUP_CLASS
)
2300 /* Final merger cleanup. */
2301 for (i
= 0; i
< words
; i
++)
2303 /* If one class is MEMORY, everything should be passed in
2305 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2308 /* The X86_64_SSEUP_CLASS should be always preceded by
2309 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2310 if (classes
[i
] == X86_64_SSEUP_CLASS
2311 && classes
[i
- 1] != X86_64_SSE_CLASS
2312 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
2314 /* The first one should never be X86_64_SSEUP_CLASS. */
2315 gcc_assert (i
!= 0);
2316 classes
[i
] = X86_64_SSE_CLASS
;
2319 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2320 everything should be passed in memory. */
2321 if (classes
[i
] == X86_64_X87UP_CLASS
2322 && (classes
[i
- 1] != X86_64_X87_CLASS
))
2326 /* The first one should never be X86_64_X87UP_CLASS. */
2327 gcc_assert (i
!= 0);
2328 if (!warned
&& warn_psabi
)
2331 inform (input_location
,
2332 "the ABI of passing union with %<long double%>"
2333 " has changed in GCC 4.4");
2341 /* Compute alignment needed. We align all types to natural boundaries with
2342 exception of XFmode that is aligned to 64bits. */
2343 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2345 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2348 mode_alignment
= 128;
2349 else if (mode
== XCmode
)
2350 mode_alignment
= 256;
2351 if (COMPLEX_MODE_P (mode
))
2352 mode_alignment
/= 2;
2353 /* Misaligned fields are always returned in memory. */
2354 if (bit_offset
% mode_alignment
)
2358 /* for V1xx modes, just use the base mode */
2359 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
2360 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
2361 mode
= GET_MODE_INNER (mode
);
2363 /* Classification of atomic types. */
2368 classes
[0] = X86_64_SSE_CLASS
;
2371 classes
[0] = X86_64_SSE_CLASS
;
2372 classes
[1] = X86_64_SSEUP_CLASS
;
2382 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
2384 /* Analyze last 128 bits only. */
2385 size
= (size
- 1) & 0x7f;
2389 classes
[0] = X86_64_INTEGERSI_CLASS
;
2394 classes
[0] = X86_64_INTEGER_CLASS
;
2397 else if (size
< 64+32)
2399 classes
[0] = X86_64_INTEGER_CLASS
;
2400 classes
[1] = X86_64_INTEGERSI_CLASS
;
2403 else if (size
< 64+64)
2405 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2413 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2417 /* OImode shouldn't be used directly. */
2423 if (!(bit_offset
% 64))
2424 classes
[0] = X86_64_SSEHF_CLASS
;
2426 classes
[0] = X86_64_SSE_CLASS
;
2429 if (!(bit_offset
% 64))
2430 classes
[0] = X86_64_SSESF_CLASS
;
2432 classes
[0] = X86_64_SSE_CLASS
;
2435 classes
[0] = X86_64_SSEDF_CLASS
;
2438 classes
[0] = X86_64_X87_CLASS
;
2439 classes
[1] = X86_64_X87UP_CLASS
;
2442 classes
[0] = X86_64_SSE_CLASS
;
2443 classes
[1] = X86_64_SSEUP_CLASS
;
2447 classes
[0] = X86_64_SSE_CLASS
;
2448 if (!(bit_offset
% 64))
2452 classes
[1] = X86_64_SSEHF_CLASS
;
2456 classes
[0] = X86_64_SSE_CLASS
;
2457 if (!(bit_offset
% 64))
2463 if (!warned
&& warn_psabi
)
2466 inform (input_location
,
2467 "the ABI of passing structure with %<complex float%>"
2468 " member has changed in GCC 4.4");
2470 classes
[1] = X86_64_SSESF_CLASS
;
2474 classes
[0] = X86_64_SSEDF_CLASS
;
2475 classes
[1] = X86_64_SSEDF_CLASS
;
2478 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2481 /* This modes is larger than 16 bytes. */
2491 classes
[0] = X86_64_SSE_CLASS
;
2492 classes
[1] = X86_64_SSEUP_CLASS
;
2493 classes
[2] = X86_64_SSEUP_CLASS
;
2494 classes
[3] = X86_64_SSEUP_CLASS
;
2504 classes
[0] = X86_64_SSE_CLASS
;
2505 classes
[1] = X86_64_SSEUP_CLASS
;
2506 classes
[2] = X86_64_SSEUP_CLASS
;
2507 classes
[3] = X86_64_SSEUP_CLASS
;
2508 classes
[4] = X86_64_SSEUP_CLASS
;
2509 classes
[5] = X86_64_SSEUP_CLASS
;
2510 classes
[6] = X86_64_SSEUP_CLASS
;
2511 classes
[7] = X86_64_SSEUP_CLASS
;
2521 classes
[0] = X86_64_SSE_CLASS
;
2522 classes
[1] = X86_64_SSEUP_CLASS
;
2534 classes
[0] = X86_64_SSE_CLASS
;
2540 gcc_assert (VECTOR_MODE_P (mode
));
2545 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2547 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2548 classes
[0] = X86_64_INTEGERSI_CLASS
;
2550 classes
[0] = X86_64_INTEGER_CLASS
;
2551 classes
[1] = X86_64_INTEGER_CLASS
;
2552 return 1 + (bytes
> 8);
2556 /* Wrapper around classify_argument with the extra zero_width_bitfields
2557 argument, to diagnose GCC 12.1 ABI differences for C. */
2560 classify_argument (machine_mode mode
, const_tree type
,
2561 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2563 int zero_width_bitfields
= 0;
2564 static bool warned
= false;
2565 int n
= classify_argument (mode
, type
, classes
, bit_offset
,
2566 zero_width_bitfields
);
2567 if (!zero_width_bitfields
|| warned
|| !warn_psabi
)
2569 enum x86_64_reg_class alt_classes
[MAX_CLASSES
];
2570 zero_width_bitfields
= 2;
2571 if (classify_argument (mode
, type
, alt_classes
, bit_offset
,
2572 zero_width_bitfields
) != n
)
2573 zero_width_bitfields
= 3;
2575 for (int i
= 0; i
< n
; i
++)
2576 if (classes
[i
] != alt_classes
[i
])
2578 zero_width_bitfields
= 3;
2581 if (zero_width_bitfields
== 3)
2585 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
2587 inform (input_location
,
2588 "the ABI of passing C structures with zero-width bit-fields"
2589 " has changed in GCC %{12.1%}", url
);
2594 /* Examine the argument and return set number of register required in each
2595 class. Return true iff parameter should be passed in memory. */
2598 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
2599 int *int_nregs
, int *sse_nregs
)
2601 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2602 int n
= classify_argument (mode
, type
, regclass
, 0);
2609 for (n
--; n
>= 0; n
--)
2610 switch (regclass
[n
])
2612 case X86_64_INTEGER_CLASS
:
2613 case X86_64_INTEGERSI_CLASS
:
2616 case X86_64_SSE_CLASS
:
2617 case X86_64_SSEHF_CLASS
:
2618 case X86_64_SSESF_CLASS
:
2619 case X86_64_SSEDF_CLASS
:
2622 case X86_64_NO_CLASS
:
2623 case X86_64_SSEUP_CLASS
:
2625 case X86_64_X87_CLASS
:
2626 case X86_64_X87UP_CLASS
:
2627 case X86_64_COMPLEX_X87_CLASS
:
2631 case X86_64_MEMORY_CLASS
:
2638 /* Construct container for the argument used by GCC interface. See
2639 FUNCTION_ARG for the detailed description. */
2642 construct_container (machine_mode mode
, machine_mode orig_mode
,
2643 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
2644 const int *intreg
, int sse_regno
)
2646 /* The following variables hold the static issued_error state. */
2647 static bool issued_sse_arg_error
;
2648 static bool issued_sse_ret_error
;
2649 static bool issued_x87_ret_error
;
2651 machine_mode tmpmode
;
2653 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2654 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2658 int needed_sseregs
, needed_intregs
;
2659 rtx exp
[MAX_CLASSES
];
2662 n
= classify_argument (mode
, type
, regclass
, 0);
2665 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
2668 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2671 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2672 some less clueful developer tries to use floating-point anyway. */
2674 && (!TARGET_SSE
|| (VALID_SSE2_TYPE_MODE (mode
) && !TARGET_SSE2
)))
2676 /* Return early if we shouldn't raise an error for invalid
2678 if (cfun
!= NULL
&& cfun
->machine
->silent_p
)
2682 if (!issued_sse_ret_error
)
2684 if (VALID_SSE2_TYPE_MODE (mode
))
2685 error ("SSE register return with SSE2 disabled");
2687 error ("SSE register return with SSE disabled");
2688 issued_sse_ret_error
= true;
2691 else if (!issued_sse_arg_error
)
2693 if (VALID_SSE2_TYPE_MODE (mode
))
2694 error ("SSE register argument with SSE2 disabled");
2696 error ("SSE register argument with SSE disabled");
2697 issued_sse_arg_error
= true;
2702 /* Likewise, error if the ABI requires us to return values in the
2703 x87 registers and the user specified -mno-80387. */
2704 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
2705 for (i
= 0; i
< n
; i
++)
2706 if (regclass
[i
] == X86_64_X87_CLASS
2707 || regclass
[i
] == X86_64_X87UP_CLASS
2708 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
2710 /* Return early if we shouldn't raise an error for invalid
2712 if (cfun
!= NULL
&& cfun
->machine
->silent_p
)
2714 if (!issued_x87_ret_error
)
2716 error ("x87 register return with x87 disabled");
2717 issued_x87_ret_error
= true;
2722 /* First construct simple cases. Avoid SCmode, since we want to use
2723 single register to pass this type. */
2724 if (n
== 1 && mode
!= SCmode
&& mode
!= HCmode
)
2725 switch (regclass
[0])
2727 case X86_64_INTEGER_CLASS
:
2728 case X86_64_INTEGERSI_CLASS
:
2729 return gen_rtx_REG (mode
, intreg
[0]);
2730 case X86_64_SSE_CLASS
:
2731 case X86_64_SSEHF_CLASS
:
2732 case X86_64_SSESF_CLASS
:
2733 case X86_64_SSEDF_CLASS
:
2734 if (mode
!= BLKmode
)
2735 return gen_reg_or_parallel (mode
, orig_mode
,
2736 GET_SSE_REGNO (sse_regno
));
2738 case X86_64_X87_CLASS
:
2739 case X86_64_COMPLEX_X87_CLASS
:
2740 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2741 case X86_64_NO_CLASS
:
2742 /* Zero sized array, struct or class. */
2748 && regclass
[0] == X86_64_SSE_CLASS
2749 && regclass
[1] == X86_64_SSEUP_CLASS
2751 return gen_reg_or_parallel (mode
, orig_mode
,
2752 GET_SSE_REGNO (sse_regno
));
2754 && regclass
[0] == X86_64_SSE_CLASS
2755 && regclass
[1] == X86_64_SSEUP_CLASS
2756 && regclass
[2] == X86_64_SSEUP_CLASS
2757 && regclass
[3] == X86_64_SSEUP_CLASS
2759 return gen_reg_or_parallel (mode
, orig_mode
,
2760 GET_SSE_REGNO (sse_regno
));
2762 && regclass
[0] == X86_64_SSE_CLASS
2763 && regclass
[1] == X86_64_SSEUP_CLASS
2764 && regclass
[2] == X86_64_SSEUP_CLASS
2765 && regclass
[3] == X86_64_SSEUP_CLASS
2766 && regclass
[4] == X86_64_SSEUP_CLASS
2767 && regclass
[5] == X86_64_SSEUP_CLASS
2768 && regclass
[6] == X86_64_SSEUP_CLASS
2769 && regclass
[7] == X86_64_SSEUP_CLASS
2771 return gen_reg_or_parallel (mode
, orig_mode
,
2772 GET_SSE_REGNO (sse_regno
));
2774 && regclass
[0] == X86_64_X87_CLASS
2775 && regclass
[1] == X86_64_X87UP_CLASS
)
2776 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2779 && regclass
[0] == X86_64_INTEGER_CLASS
2780 && regclass
[1] == X86_64_INTEGER_CLASS
2781 && (mode
== CDImode
|| mode
== TImode
|| mode
== BLKmode
)
2782 && intreg
[0] + 1 == intreg
[1])
2784 if (mode
== BLKmode
)
2786 /* Use TImode for BLKmode values in 2 integer registers. */
2787 exp
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
2788 gen_rtx_REG (TImode
, intreg
[0]),
2790 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (1));
2791 XVECEXP (ret
, 0, 0) = exp
[0];
2795 return gen_rtx_REG (mode
, intreg
[0]);
2798 /* Otherwise figure out the entries of the PARALLEL. */
2799 for (i
= 0; i
< n
; i
++)
2803 switch (regclass
[i
])
2805 case X86_64_NO_CLASS
:
2807 case X86_64_INTEGER_CLASS
:
2808 case X86_64_INTEGERSI_CLASS
:
2809 /* Merge TImodes on aligned occasions here too. */
2810 if (i
* 8 + 8 > bytes
)
2812 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
2813 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
2814 /* We've requested 24 bytes we
2815 don't have mode for. Use DImode. */
2818 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
2823 = gen_rtx_EXPR_LIST (VOIDmode
,
2824 gen_rtx_REG (tmpmode
, *intreg
),
2828 case X86_64_SSEHF_CLASS
:
2829 tmpmode
= (mode
== BFmode
? BFmode
: HFmode
);
2831 = gen_rtx_EXPR_LIST (VOIDmode
,
2832 gen_rtx_REG (tmpmode
,
2833 GET_SSE_REGNO (sse_regno
)),
2837 case X86_64_SSESF_CLASS
:
2839 = gen_rtx_EXPR_LIST (VOIDmode
,
2840 gen_rtx_REG (SFmode
,
2841 GET_SSE_REGNO (sse_regno
)),
2845 case X86_64_SSEDF_CLASS
:
2847 = gen_rtx_EXPR_LIST (VOIDmode
,
2848 gen_rtx_REG (DFmode
,
2849 GET_SSE_REGNO (sse_regno
)),
2853 case X86_64_SSE_CLASS
:
2861 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
2871 && regclass
[1] == X86_64_SSEUP_CLASS
2872 && regclass
[2] == X86_64_SSEUP_CLASS
2873 && regclass
[3] == X86_64_SSEUP_CLASS
);
2879 && regclass
[1] == X86_64_SSEUP_CLASS
2880 && regclass
[2] == X86_64_SSEUP_CLASS
2881 && regclass
[3] == X86_64_SSEUP_CLASS
2882 && regclass
[4] == X86_64_SSEUP_CLASS
2883 && regclass
[5] == X86_64_SSEUP_CLASS
2884 && regclass
[6] == X86_64_SSEUP_CLASS
2885 && regclass
[7] == X86_64_SSEUP_CLASS
);
2893 = gen_rtx_EXPR_LIST (VOIDmode
,
2894 gen_rtx_REG (tmpmode
,
2895 GET_SSE_REGNO (sse_regno
)),
2904 /* Empty aligned struct, union or class. */
2908 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2909 for (i
= 0; i
< nexps
; i
++)
2910 XVECEXP (ret
, 0, i
) = exp
[i
];
2914 /* Update the data in CUM to advance over an argument of mode MODE
2915 and data type TYPE. (TYPE is null for libcalls where that information
2916 may not be available.)
2918 Return a number of integer regsiters advanced over. */
2921 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2922 const_tree type
, HOST_WIDE_INT bytes
,
2923 HOST_WIDE_INT words
)
2926 bool error_p
= false;
2930 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2931 bytes in registers. */
2932 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
2952 cum
->words
+= words
;
2953 cum
->nregs
-= words
;
2954 cum
->regno
+= words
;
2955 if (cum
->nregs
>= 0)
2957 if (cum
->nregs
<= 0)
2960 cfun
->machine
->arg_reg_available
= false;
2966 /* OImode shouldn't be used directly. */
2970 if (cum
->float_in_sse
== -1)
2972 if (cum
->float_in_sse
< 2)
2976 if (cum
->float_in_sse
== -1)
2978 if (cum
->float_in_sse
< 1)
3007 if (!type
|| !AGGREGATE_TYPE_P (type
))
3009 cum
->sse_words
+= words
;
3010 cum
->sse_nregs
-= 1;
3011 cum
->sse_regno
+= 1;
3012 if (cum
->sse_nregs
<= 0)
3028 if (!type
|| !AGGREGATE_TYPE_P (type
))
3030 cum
->mmx_words
+= words
;
3031 cum
->mmx_nregs
-= 1;
3032 cum
->mmx_regno
+= 1;
3033 if (cum
->mmx_nregs
<= 0)
3043 cum
->float_in_sse
= 0;
3044 error ("calling %qD with SSE calling convention without "
3045 "SSE/SSE2 enabled", cum
->decl
);
3046 sorry ("this is a GCC bug that can be worked around by adding "
3047 "attribute used to function called");
3054 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3055 const_tree type
, HOST_WIDE_INT words
, bool named
)
3057 int int_nregs
, sse_nregs
;
3059 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3060 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
3061 || VALID_AVX256_REG_MODE (mode
)))
3064 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
3065 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3067 cum
->nregs
-= int_nregs
;
3068 cum
->sse_nregs
-= sse_nregs
;
3069 cum
->regno
+= int_nregs
;
3070 cum
->sse_regno
+= sse_nregs
;
3075 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
3076 cum
->words
= ROUND_UP (cum
->words
, align
);
3077 cum
->words
+= words
;
3083 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
3084 HOST_WIDE_INT words
)
3086 /* Otherwise, this should be passed indirect. */
3087 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
3089 cum
->words
+= words
;
3099 /* Update the data in CUM to advance over argument ARG. */
3102 ix86_function_arg_advance (cumulative_args_t cum_v
,
3103 const function_arg_info
&arg
)
3105 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3106 machine_mode mode
= arg
.mode
;
3107 HOST_WIDE_INT bytes
, words
;
3110 /* The argument of interrupt handler is a special case and is
3111 handled in ix86_function_arg. */
3112 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3115 bytes
= arg
.promoted_size_in_bytes ();
3116 words
= CEIL (bytes
, UNITS_PER_WORD
);
3119 mode
= type_natural_mode (arg
.type
, NULL
, false);
3123 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3125 if (call_abi
== MS_ABI
)
3126 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
3128 nregs
= function_arg_advance_64 (cum
, mode
, arg
.type
, words
,
3132 nregs
= function_arg_advance_32 (cum
, mode
, arg
.type
, bytes
, words
);
3136 /* Track if there are outgoing arguments on stack. */
3138 cfun
->machine
->outgoing_args_on_stack
= true;
3142 /* Define where to put the arguments to a function.
3143 Value is zero to push the argument on the stack,
3144 or a hard register in which to store the argument.
3146 MODE is the argument's machine mode.
3147 TYPE is the data type of the argument (as a tree).
3148 This is null for libcalls where that information may
3150 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3151 the preceding args and about the function being called.
3152 NAMED is nonzero if this argument is a named parameter
3153 (otherwise it is an extra parameter matching an ellipsis). */
3156 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3157 machine_mode orig_mode
, const_tree type
,
3158 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3160 bool error_p
= false;
3162 /* Avoid the AL settings for the Unix64 ABI. */
3163 if (mode
== VOIDmode
)
3168 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3169 bytes in registers. */
3170 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
3189 if (words
<= cum
->nregs
)
3191 int regno
= cum
->regno
;
3193 /* Fastcall allocates the first two DWORD (SImode) or
3194 smaller arguments to ECX and EDX if it isn't an
3200 || (type
&& AGGREGATE_TYPE_P (type
)))
3203 /* ECX not EAX is the first allocated register. */
3204 if (regno
== AX_REG
)
3207 return gen_rtx_REG (mode
, regno
);
3212 if (cum
->float_in_sse
== -1)
3214 if (cum
->float_in_sse
< 2)
3218 if (cum
->float_in_sse
== -1)
3220 if (cum
->float_in_sse
< 1)
3224 /* In 32bit, we pass TImode in xmm registers. */
3233 if (!type
|| !AGGREGATE_TYPE_P (type
))
3236 return gen_reg_or_parallel (mode
, orig_mode
,
3237 cum
->sse_regno
+ FIRST_SSE_REG
);
3243 /* OImode and XImode shouldn't be used directly. */
3262 if (!type
|| !AGGREGATE_TYPE_P (type
))
3265 return gen_reg_or_parallel (mode
, orig_mode
,
3266 cum
->sse_regno
+ FIRST_SSE_REG
);
3278 if (!type
|| !AGGREGATE_TYPE_P (type
))
3281 return gen_reg_or_parallel (mode
, orig_mode
,
3282 cum
->mmx_regno
+ FIRST_MMX_REG
);
3288 cum
->float_in_sse
= 0;
3289 error ("calling %qD with SSE calling convention without "
3290 "SSE/SSE2 enabled", cum
->decl
);
3291 sorry ("this is a GCC bug that can be worked around by adding "
3292 "attribute used to function called");
3299 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3300 machine_mode orig_mode
, const_tree type
, bool named
)
3302 /* Handle a hidden AL argument containing number of registers
3303 for varargs x86-64 functions. */
3304 if (mode
== VOIDmode
)
3305 return GEN_INT (cum
->maybe_vaarg
3306 ? (cum
->sse_nregs
< 0
3307 ? X86_64_SSE_REGPARM_MAX
3332 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3338 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3340 &x86_64_int_parameter_registers
[cum
->regno
],
3345 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3346 machine_mode orig_mode
, bool named
, const_tree type
,
3347 HOST_WIDE_INT bytes
)
3351 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3352 We use value of -2 to specify that current function call is MSABI. */
3353 if (mode
== VOIDmode
)
3354 return GEN_INT (-2);
3356 /* If we've run out of registers, it goes on the stack. */
3357 if (cum
->nregs
== 0)
3360 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3362 /* Only floating point modes are passed in anything but integer regs. */
3363 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3367 if (type
== NULL_TREE
|| !AGGREGATE_TYPE_P (type
))
3368 regno
= cum
->regno
+ FIRST_SSE_REG
;
3374 /* Unnamed floating parameters are passed in both the
3375 SSE and integer registers. */
3376 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3377 t2
= gen_rtx_REG (mode
, regno
);
3378 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3379 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3380 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3383 /* Handle aggregated types passed in register. */
3384 if (orig_mode
== BLKmode
)
3386 if (bytes
> 0 && bytes
<= 8)
3387 mode
= (bytes
> 4 ? DImode
: SImode
);
3388 if (mode
== BLKmode
)
3392 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3395 /* Return where to put the arguments to a function.
3396 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3398 ARG describes the argument while CUM gives information about the
3399 preceding args and about the function being called. */
3402 ix86_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3404 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3405 machine_mode mode
= arg
.mode
;
3406 HOST_WIDE_INT bytes
, words
;
3409 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3411 gcc_assert (arg
.type
!= NULL_TREE
);
3412 if (POINTER_TYPE_P (arg
.type
))
3414 /* This is the pointer argument. */
3415 gcc_assert (TYPE_MODE (arg
.type
) == ptr_mode
);
3416 /* It is at -WORD(AP) in the current frame in interrupt and
3417 exception handlers. */
3418 reg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
3422 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
3423 && TREE_CODE (arg
.type
) == INTEGER_TYPE
3424 && TYPE_MODE (arg
.type
) == word_mode
);
3425 /* The error code is the word-mode integer argument at
3426 -2 * WORD(AP) in the current frame of the exception
3428 reg
= gen_rtx_MEM (word_mode
,
3429 plus_constant (Pmode
,
3431 -2 * UNITS_PER_WORD
));
3436 bytes
= arg
.promoted_size_in_bytes ();
3437 words
= CEIL (bytes
, UNITS_PER_WORD
);
3439 /* To simplify the code below, represent vector types with a vector mode
3440 even if MMX/SSE are not active. */
3441 if (arg
.type
&& VECTOR_TYPE_P (arg
.type
))
3442 mode
= type_natural_mode (arg
.type
, cum
, false);
3446 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3448 if (call_abi
== MS_ABI
)
3449 reg
= function_arg_ms_64 (cum
, mode
, arg
.mode
, arg
.named
,
3452 reg
= function_arg_64 (cum
, mode
, arg
.mode
, arg
.type
, arg
.named
);
3455 reg
= function_arg_32 (cum
, mode
, arg
.mode
, arg
.type
, bytes
, words
);
3457 /* Track if there are outgoing arguments on stack. */
3458 if (reg
== NULL_RTX
&& cum
->caller
)
3459 cfun
->machine
->outgoing_args_on_stack
= true;
3464 /* A C expression that indicates when an argument must be passed by
3465 reference. If nonzero for an argument, a copy of that argument is
3466 made in memory and a pointer to the argument is passed instead of
3467 the argument itself. The pointer is passed in whatever way is
3468 appropriate for passing a pointer to that type. */
3471 ix86_pass_by_reference (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3473 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3477 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3479 /* See Windows x64 Software Convention. */
3480 if (call_abi
== MS_ABI
)
3482 HOST_WIDE_INT msize
= GET_MODE_SIZE (arg
.mode
);
3484 if (tree type
= arg
.type
)
3486 /* Arrays are passed by reference. */
3487 if (TREE_CODE (type
) == ARRAY_TYPE
)
3490 if (RECORD_OR_UNION_TYPE_P (type
))
3492 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3493 are passed by reference. */
3494 msize
= int_size_in_bytes (type
);
3498 /* __m128 is passed by reference. */
3499 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
3501 else if (arg
.type
&& int_size_in_bytes (arg
.type
) == -1)
3508 /* Return true when TYPE should be 128bit aligned for 32bit argument
3509 passing ABI. XXX: This function is obsolete and is only used for
3510 checking psABI compatibility with previous versions of GCC. */
3513 ix86_compat_aligned_value_p (const_tree type
)
3515 machine_mode mode
= TYPE_MODE (type
);
3516 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
3520 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3522 if (TYPE_ALIGN (type
) < 128)
3525 if (AGGREGATE_TYPE_P (type
))
3527 /* Walk the aggregates recursively. */
3528 switch (TREE_CODE (type
))
3532 case QUAL_UNION_TYPE
:
3536 /* Walk all the structure fields. */
3537 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3539 if (TREE_CODE (field
) == FIELD_DECL
3540 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
3547 /* Just for use if some languages passes arrays by value. */
3548 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
3559 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3560 XXX: This function is obsolete and is only used for checking psABI
3561 compatibility with previous versions of GCC. */
3564 ix86_compat_function_arg_boundary (machine_mode mode
,
3565 const_tree type
, unsigned int align
)
3567 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3568 natural boundaries. */
3569 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
3571 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3572 make an exception for SSE modes since these require 128bit
3575 The handling here differs from field_alignment. ICC aligns MMX
3576 arguments to 4 byte boundaries, while structure fields are aligned
3577 to 8 byte boundaries. */
3580 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
3581 align
= PARM_BOUNDARY
;
3585 if (!ix86_compat_aligned_value_p (type
))
3586 align
= PARM_BOUNDARY
;
3589 if (align
> BIGGEST_ALIGNMENT
)
3590 align
= BIGGEST_ALIGNMENT
;
3594 /* Return true when TYPE should be 128bit aligned for 32bit argument
3598 ix86_contains_aligned_value_p (const_tree type
)
3600 machine_mode mode
= TYPE_MODE (type
);
3602 if (mode
== XFmode
|| mode
== XCmode
)
3605 if (TYPE_ALIGN (type
) < 128)
3608 if (AGGREGATE_TYPE_P (type
))
3610 /* Walk the aggregates recursively. */
3611 switch (TREE_CODE (type
))
3615 case QUAL_UNION_TYPE
:
3619 /* Walk all the structure fields. */
3620 for (field
= TYPE_FIELDS (type
);
3622 field
= DECL_CHAIN (field
))
3624 if (TREE_CODE (field
) == FIELD_DECL
3625 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
3632 /* Just for use if some languages passes arrays by value. */
3633 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
3642 return TYPE_ALIGN (type
) >= 128;
3647 /* Gives the alignment boundary, in bits, of an argument with the
3648 specified mode and type. */
3651 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
3656 /* Since the main variant type is used for call, we convert it to
3657 the main variant type. */
3658 type
= TYPE_MAIN_VARIANT (type
);
3659 align
= TYPE_ALIGN (type
);
3660 if (TYPE_EMPTY_P (type
))
3661 return PARM_BOUNDARY
;
3664 align
= GET_MODE_ALIGNMENT (mode
);
3665 if (align
< PARM_BOUNDARY
)
3666 align
= PARM_BOUNDARY
;
3670 unsigned int saved_align
= align
;
3674 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3677 if (mode
== XFmode
|| mode
== XCmode
)
3678 align
= PARM_BOUNDARY
;
3680 else if (!ix86_contains_aligned_value_p (type
))
3681 align
= PARM_BOUNDARY
;
3684 align
= PARM_BOUNDARY
;
3689 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
3693 inform (input_location
,
3694 "the ABI for passing parameters with %d-byte"
3695 " alignment has changed in GCC 4.6",
3696 align
/ BITS_PER_UNIT
);
3703 /* Return true if N is a possible register number of function value. */
3706 ix86_function_value_regno_p (const unsigned int regno
)
3713 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
3716 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
3718 /* Complex values are returned in %st(0)/%st(1) pair. */
3721 /* TODO: The function should depend on current function ABI but
3722 builtins.cc would need updating then. Therefore we use the
3724 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
3726 return TARGET_FLOAT_RETURNS_IN_80387
;
3728 /* Complex values are returned in %xmm0/%xmm1 pair. */
3734 if (TARGET_MACHO
|| TARGET_64BIT
)
3742 /* Check whether the register REGNO should be zeroed on X86.
3743 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3744 together, no need to zero it again.
3745 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3748 zero_call_used_regno_p (const unsigned int regno
,
3749 bool all_sse_zeroed
,
3752 return GENERAL_REGNO_P (regno
)
3753 || (!all_sse_zeroed
&& SSE_REGNO_P (regno
))
3754 || MASK_REGNO_P (regno
)
3755 || (need_zero_mmx
&& MMX_REGNO_P (regno
));
3758 /* Return the machine_mode that is used to zero register REGNO. */
3761 zero_call_used_regno_mode (const unsigned int regno
)
3763 /* NB: We only need to zero the lower 32 bits for integer registers
3764 and the lower 128 bits for vector registers since destination are
3765 zero-extended to the full register width. */
3766 if (GENERAL_REGNO_P (regno
))
3768 else if (SSE_REGNO_P (regno
))
3770 else if (MASK_REGNO_P (regno
))
3772 else if (MMX_REGNO_P (regno
))
3778 /* Generate a rtx to zero all vector registers together if possible,
3779 otherwise, return NULL. */
3782 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs
)
3787 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3788 if ((LEGACY_SSE_REGNO_P (regno
)
3790 && (REX_SSE_REGNO_P (regno
)
3791 || (TARGET_AVX512F
&& EXT_REX_SSE_REGNO_P (regno
)))))
3792 && !TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3795 return gen_avx_vzeroall ();
3798 /* Generate insns to zero all st registers together.
3799 Return true when zeroing instructions are generated.
3800 Assume the number of st registers that are zeroed is num_of_st,
3801 we will emit the following sequence to zero them together:
3810 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3811 mark stack slots empty.
3813 How to compute the num_of_st:
3814 There is no direct mapping from stack registers to hard register
3815 numbers. If one stack register needs to be cleared, we don't know
3816 where in the stack the value remains. So, if any stack register
3817 needs to be cleared, the whole stack should be cleared. However,
3818 x87 stack registers that hold the return value should be excluded.
3819 x87 returns in the top (two for complex values) register, so
3820 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3821 return the value of num_of_st. */
3825 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs
)
3828 /* If the FPU is disabled, no need to zero all st registers. */
3829 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3832 unsigned int num_of_st
= 0;
3833 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3834 if ((STACK_REGNO_P (regno
) || MMX_REGNO_P (regno
))
3835 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3844 bool return_with_x87
= false;
3845 return_with_x87
= (crtl
->return_rtx
3846 && (STACK_REG_P (crtl
->return_rtx
)));
3848 bool complex_return
= false;
3849 complex_return
= (crtl
->return_rtx
3850 && COMPLEX_MODE_P (GET_MODE (crtl
->return_rtx
)));
3852 if (return_with_x87
)
3860 rtx st_reg
= gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3861 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3862 emit_insn (gen_rtx_SET (st_reg
, CONST0_RTX (XFmode
)));
3864 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3867 insn
= emit_insn (gen_rtx_SET (st_reg
, st_reg
));
3868 add_reg_note (insn
, REG_DEAD
, st_reg
);
3874 /* When the routine exit in MMX mode, if any ST register needs
3875 to be zeroed, we should clear all MMX registers except the
3876 RET_MMX_REGNO that holds the return value. */
3878 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs
,
3879 unsigned int ret_mmx_regno
)
3881 bool need_zero_all_mm
= false;
3882 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3883 if (STACK_REGNO_P (regno
)
3884 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3886 need_zero_all_mm
= true;
3890 if (!need_zero_all_mm
)
3893 machine_mode mode
= V2SImode
;
3894 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3895 if (regno
!= ret_mmx_regno
)
3897 rtx reg
= gen_rtx_REG (mode
, regno
);
3898 emit_insn (gen_rtx_SET (reg
, CONST0_RTX (mode
)));
3903 /* TARGET_ZERO_CALL_USED_REGS. */
3904 /* Generate a sequence of instructions that zero registers specified by
3905 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3908 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs
)
3910 HARD_REG_SET zeroed_hardregs
;
3911 bool all_sse_zeroed
= false;
3912 int all_st_zeroed_num
= 0;
3913 bool all_mm_zeroed
= false;
3915 CLEAR_HARD_REG_SET (zeroed_hardregs
);
3917 /* first, let's see whether we can zero all vector registers together. */
3918 rtx zero_all_vec_insn
= zero_all_vector_registers (need_zeroed_hardregs
);
3919 if (zero_all_vec_insn
)
3921 emit_insn (zero_all_vec_insn
);
3922 all_sse_zeroed
= true;
3925 /* mm/st registers are shared registers set, we should follow the following
3926 rules to clear them:
3927 MMX exit mode x87 exit mode
3928 -------------|----------------------|---------------
3929 uses x87 reg | clear all MMX | clear all x87
3930 uses MMX reg | clear individual MMX | clear all x87
3931 x87 + MMX | clear all MMX | clear all x87
3933 first, we should decide which mode (MMX mode or x87 mode) the function
3936 bool exit_with_mmx_mode
= (crtl
->return_rtx
3937 && (MMX_REG_P (crtl
->return_rtx
)));
3939 if (!exit_with_mmx_mode
)
3940 /* x87 exit mode, we should zero all st registers together. */
3942 all_st_zeroed_num
= zero_all_st_registers (need_zeroed_hardregs
);
3944 if (all_st_zeroed_num
> 0)
3945 for (unsigned int regno
= FIRST_STACK_REG
; regno
<= LAST_STACK_REG
; regno
++)
3946 /* x87 stack registers that hold the return value should be excluded.
3947 x87 returns in the top (two for complex values) register. */
3948 if (all_st_zeroed_num
== 8
3949 || !((all_st_zeroed_num
>= 6 && regno
== REGNO (crtl
->return_rtx
))
3950 || (all_st_zeroed_num
== 6
3951 && (regno
== (REGNO (crtl
->return_rtx
) + 1)))))
3952 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3955 /* MMX exit mode, check whether we can zero all mm registers. */
3957 unsigned int exit_mmx_regno
= REGNO (crtl
->return_rtx
);
3958 all_mm_zeroed
= zero_all_mm_registers (need_zeroed_hardregs
,
3961 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3962 if (regno
!= exit_mmx_regno
)
3963 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3966 /* Now, generate instructions to zero all the other registers. */
3968 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3970 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3972 if (!zero_call_used_regno_p (regno
, all_sse_zeroed
,
3973 exit_with_mmx_mode
&& !all_mm_zeroed
))
3976 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3978 machine_mode mode
= zero_call_used_regno_mode (regno
);
3980 rtx reg
= gen_rtx_REG (mode
, regno
);
3981 rtx tmp
= gen_rtx_SET (reg
, CONST0_RTX (mode
));
3986 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
3988 rtx clob
= gen_rtx_CLOBBER (VOIDmode
,
3989 gen_rtx_REG (CCmode
,
3991 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
4007 return zeroed_hardregs
;
4010 /* Define how to find the value returned by a function.
4011 VALTYPE is the data type of the value (as a tree).
4012 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4013 otherwise, FUNC is 0. */
4016 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
4017 const_tree fntype
, const_tree fn
)
4021 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4022 we normally prevent this case when mmx is not available. However
4023 some ABIs may require the result to be returned like DImode. */
4024 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4025 regno
= FIRST_MMX_REG
;
4027 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4028 we prevent this case when sse is not available. However some ABIs
4029 may require the result to be returned like integer TImode. */
4030 else if (mode
== TImode
4031 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4032 regno
= FIRST_SSE_REG
;
4034 /* 32-byte vector modes in %ymm0. */
4035 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
4036 regno
= FIRST_SSE_REG
;
4038 /* 64-byte vector modes in %zmm0. */
4039 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
4040 regno
= FIRST_SSE_REG
;
4042 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4043 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4044 regno
= FIRST_FLOAT_REG
;
4046 /* Most things go in %eax. */
4049 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4050 if (mode
== HFmode
|| mode
== BFmode
)
4054 error ("SSE register return with SSE2 disabled");
4058 regno
= FIRST_SSE_REG
;
4064 error ("SSE register return with SSE2 disabled");
4066 rtx ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc(1));
4068 = gen_rtx_EXPR_LIST (VOIDmode
,
4069 gen_rtx_REG (SImode
,
4070 TARGET_SSE2
? FIRST_SSE_REG
: AX_REG
),
4075 /* Override FP return register with %xmm0 for local functions when
4076 SSE math is enabled or for functions with sseregparm attribute. */
4077 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4079 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
4080 if (sse_level
== -1)
4082 error ("calling %qD with SSE calling convention without "
4083 "SSE/SSE2 enabled", fn
);
4084 sorry ("this is a GCC bug that can be worked around by adding "
4085 "attribute used to function called");
4087 else if ((sse_level
>= 1 && mode
== SFmode
)
4088 || (sse_level
== 2 && mode
== DFmode
))
4089 regno
= FIRST_SSE_REG
;
4092 /* OImode shouldn't be used directly. */
4093 gcc_assert (mode
!= OImode
);
4095 return gen_rtx_REG (orig_mode
, regno
);
4099 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
4104 /* Handle libcalls, which don't provide a type node. */
4105 if (valtype
== NULL
)
4122 regno
= FIRST_SSE_REG
;
4126 regno
= FIRST_FLOAT_REG
;
4134 return gen_rtx_REG (mode
, regno
);
4136 else if (POINTER_TYPE_P (valtype
))
4138 /* Pointers are always returned in word_mode. */
4142 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4143 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
4144 x86_64_int_return_registers
, 0);
4146 /* For zero sized structures, construct_container returns NULL, but we
4147 need to keep rest of compiler happy by returning meaningful value. */
4149 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
4155 function_value_ms_32 (machine_mode orig_mode
, machine_mode mode
,
4156 const_tree fntype
, const_tree fn
, const_tree valtype
)
4160 /* Floating point return values in %st(0)
4161 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4162 if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
4163 && (GET_MODE_SIZE (mode
) > 8
4164 || valtype
== NULL_TREE
|| !AGGREGATE_TYPE_P (valtype
)))
4166 regno
= FIRST_FLOAT_REG
;
4167 return gen_rtx_REG (orig_mode
, regno
);
4170 return function_value_32(orig_mode
, mode
, fntype
,fn
);
4174 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
4177 unsigned int regno
= AX_REG
;
4181 switch (GET_MODE_SIZE (mode
))
4184 if (valtype
!= NULL_TREE
4185 && !VECTOR_INTEGER_TYPE_P (valtype
)
4186 && !VECTOR_INTEGER_TYPE_P (valtype
)
4187 && !INTEGRAL_TYPE_P (valtype
)
4188 && !VECTOR_FLOAT_TYPE_P (valtype
))
4190 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4191 && !COMPLEX_MODE_P (mode
))
4192 regno
= FIRST_SSE_REG
;
4196 if (valtype
!= NULL_TREE
&& AGGREGATE_TYPE_P (valtype
))
4198 if (mode
== SFmode
|| mode
== DFmode
)
4199 regno
= FIRST_SSE_REG
;
4205 return gen_rtx_REG (orig_mode
, regno
);
4209 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4210 machine_mode orig_mode
, machine_mode mode
)
4212 const_tree fn
, fntype
;
4215 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4216 fn
= fntype_or_decl
;
4217 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4219 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4222 return function_value_ms_64 (orig_mode
, mode
, valtype
);
4224 return function_value_ms_32 (orig_mode
, mode
, fntype
, fn
, valtype
);
4226 else if (TARGET_64BIT
)
4227 return function_value_64 (orig_mode
, mode
, valtype
);
4229 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4233 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
4235 machine_mode mode
, orig_mode
;
4237 orig_mode
= TYPE_MODE (valtype
);
4238 mode
= type_natural_mode (valtype
, NULL
, true);
4239 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4242 /* Pointer function arguments and return values are promoted to
4243 word_mode for normal functions. */
4246 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
4247 int *punsignedp
, const_tree fntype
,
4250 if (cfun
->machine
->func_type
== TYPE_NORMAL
4251 && type
!= NULL_TREE
4252 && POINTER_TYPE_P (type
))
4254 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
4257 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
4261 /* Return true if a structure, union or array with MODE containing FIELD
4262 should be accessed using BLKmode. */
4265 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
4267 /* Union with XFmode must be in BLKmode. */
4268 return (mode
== XFmode
4269 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
4270 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
4274 ix86_libcall_value (machine_mode mode
)
4276 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4279 /* Return true iff type is returned in memory. */
4282 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4284 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
4289 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4291 size
= int_size_in_bytes (type
);
4293 /* __m128 is returned in xmm0. */
4294 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
4295 || INTEGRAL_TYPE_P (type
)
4296 || VECTOR_FLOAT_TYPE_P (type
))
4297 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4298 && !COMPLEX_MODE_P (mode
)
4299 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
4302 /* Otherwise, the size must be exactly in [1248]. */
4303 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
4307 int needed_intregs
, needed_sseregs
;
4309 return examine_argument (mode
, type
, 1,
4310 &needed_intregs
, &needed_sseregs
);
4315 size
= int_size_in_bytes (type
);
4317 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4318 bytes in registers. */
4320 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
4322 if (mode
== BLKmode
)
4325 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4328 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4330 /* User-created vectors small enough to fit in EAX. */
4334 /* Unless ABI prescibes otherwise,
4335 MMX/3dNow values are returned in MM0 if available. */
4338 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
4340 /* SSE values are returned in XMM0 if available. */
4344 /* AVX values are returned in YMM0 if available. */
4348 /* AVX512F values are returned in ZMM0 if available. */
4350 return !TARGET_AVX512F
;
4359 /* OImode shouldn't be used directly. */
4360 gcc_assert (mode
!= OImode
);
4366 /* Implement TARGET_PUSH_ARGUMENT. */
4369 ix86_push_argument (unsigned int npush
)
4371 /* If SSE2 is available, use vector move to put large argument onto
4372 stack. NB: In 32-bit mode, use 8-byte vector move. */
4373 return ((!TARGET_SSE2
|| npush
< (TARGET_64BIT
? 16 : 8))
4375 && !ACCUMULATE_OUTGOING_ARGS
);
4379 /* Create the va_list data type. */
4382 ix86_build_builtin_va_list_64 (void)
4384 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4386 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
4387 type_decl
= build_decl (BUILTINS_LOCATION
,
4388 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4390 f_gpr
= build_decl (BUILTINS_LOCATION
,
4391 FIELD_DECL
, get_identifier ("gp_offset"),
4392 unsigned_type_node
);
4393 f_fpr
= build_decl (BUILTINS_LOCATION
,
4394 FIELD_DECL
, get_identifier ("fp_offset"),
4395 unsigned_type_node
);
4396 f_ovf
= build_decl (BUILTINS_LOCATION
,
4397 FIELD_DECL
, get_identifier ("overflow_arg_area"),
4399 f_sav
= build_decl (BUILTINS_LOCATION
,
4400 FIELD_DECL
, get_identifier ("reg_save_area"),
4403 va_list_gpr_counter_field
= f_gpr
;
4404 va_list_fpr_counter_field
= f_fpr
;
4406 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4407 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4408 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4409 DECL_FIELD_CONTEXT (f_sav
) = record
;
4411 TYPE_STUB_DECL (record
) = type_decl
;
4412 TYPE_NAME (record
) = type_decl
;
4413 TYPE_FIELDS (record
) = f_gpr
;
4414 DECL_CHAIN (f_gpr
) = f_fpr
;
4415 DECL_CHAIN (f_fpr
) = f_ovf
;
4416 DECL_CHAIN (f_ovf
) = f_sav
;
4418 layout_type (record
);
4420 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
4421 NULL_TREE
, TYPE_ATTRIBUTES (record
));
4423 /* The correct type is an array type of one element. */
4424 return build_array_type (record
, build_index_type (size_zero_node
));
4427 /* Setup the builtin va_list data type and for 64-bit the additional
4428 calling convention specific va_list data types. */
4431 ix86_build_builtin_va_list (void)
4435 /* Initialize ABI specific va_list builtin types.
4437 In lto1, we can encounter two va_list types:
4438 - one as a result of the type-merge across TUs, and
4439 - the one constructed here.
4440 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4441 a type identity check in canonical_va_list_type based on
4442 TYPE_MAIN_VARIANT (which we used to have) will not work.
4443 Instead, we tag each va_list_type_node with its unique attribute, and
4444 look for the attribute in the type identity check in
4445 canonical_va_list_type.
4447 Tagging sysv_va_list_type_node directly with the attribute is
4448 problematic since it's a array of one record, which will degrade into a
4449 pointer to record when used as parameter (see build_va_arg comments for
4450 an example), dropping the attribute in the process. So we tag the
4453 /* For SYSV_ABI we use an array of one record. */
4454 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
4456 /* For MS_ABI we use plain pointer to argument area. */
4457 tree char_ptr_type
= build_pointer_type (char_type_node
);
4458 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
4459 TYPE_ATTRIBUTES (char_ptr_type
));
4460 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
4462 return ((ix86_abi
== MS_ABI
)
4463 ? ms_va_list_type_node
4464 : sysv_va_list_type_node
);
4468 /* For i386 we use plain pointer to argument area. */
4469 return build_pointer_type (char_type_node
);
4473 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4476 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4482 /* GPR size of varargs save area. */
4483 if (cfun
->va_list_gpr_size
)
4484 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
4486 ix86_varargs_gpr_size
= 0;
4488 /* FPR size of varargs save area. We don't need it if we don't pass
4489 anything in SSE registers. */
4490 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4491 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
4493 ix86_varargs_fpr_size
= 0;
4495 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
4498 save_area
= frame_pointer_rtx
;
4499 set
= get_varargs_alias_set ();
4501 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4502 if (max
> X86_64_REGPARM_MAX
)
4503 max
= X86_64_REGPARM_MAX
;
4505 for (i
= cum
->regno
; i
< max
; i
++)
4507 mem
= gen_rtx_MEM (word_mode
,
4508 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
4509 MEM_NOTRAP_P (mem
) = 1;
4510 set_mem_alias_set (mem
, set
);
4511 emit_move_insn (mem
,
4512 gen_rtx_REG (word_mode
,
4513 x86_64_int_parameter_registers
[i
]));
4516 if (ix86_varargs_fpr_size
)
4519 rtx_code_label
*label
;
4522 /* Now emit code to save SSE registers. The AX parameter contains number
4523 of SSE parameter registers used to call this function, though all we
4524 actually check here is the zero/non-zero status. */
4526 label
= gen_label_rtx ();
4527 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
4528 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
4531 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4532 we used movdqa (i.e. TImode) instead? Perhaps even better would
4533 be if we could determine the real mode of the data, via a hook
4534 into pass_stdarg. Ignore all that for now. */
4536 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
4537 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
4539 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
4540 if (max
> X86_64_SSE_REGPARM_MAX
)
4541 max
= X86_64_SSE_REGPARM_MAX
;
4543 for (i
= cum
->sse_regno
; i
< max
; ++i
)
4545 mem
= plus_constant (Pmode
, save_area
,
4546 i
* 16 + ix86_varargs_gpr_size
);
4547 mem
= gen_rtx_MEM (smode
, mem
);
4548 MEM_NOTRAP_P (mem
) = 1;
4549 set_mem_alias_set (mem
, set
);
4550 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
4552 emit_move_insn (mem
, gen_rtx_REG (smode
, GET_SSE_REGNO (i
)));
4560 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4562 alias_set_type set
= get_varargs_alias_set ();
4565 /* Reset to zero, as there might be a sysv vaarg used
4567 ix86_varargs_gpr_size
= 0;
4568 ix86_varargs_fpr_size
= 0;
4570 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
4574 mem
= gen_rtx_MEM (Pmode
,
4575 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4576 i
* UNITS_PER_WORD
));
4577 MEM_NOTRAP_P (mem
) = 1;
4578 set_mem_alias_set (mem
, set
);
4580 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4581 emit_move_insn (mem
, reg
);
4586 ix86_setup_incoming_varargs (cumulative_args_t cum_v
,
4587 const function_arg_info
&arg
,
4590 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4591 CUMULATIVE_ARGS next_cum
;
4594 /* This argument doesn't appear to be used anymore. Which is good,
4595 because the old code here didn't suppress rtl generation. */
4596 gcc_assert (!no_rtl
);
4601 fntype
= TREE_TYPE (current_function_decl
);
4603 /* For varargs, we do not want to skip the dummy va_dcl argument.
4604 For stdargs, we do want to skip the last named argument. */
4606 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
4607 && stdarg_p (fntype
))
4608 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), arg
);
4610 if (cum
->call_abi
== MS_ABI
)
4611 setup_incoming_varargs_ms_64 (&next_cum
);
4613 setup_incoming_varargs_64 (&next_cum
);
4616 /* Checks if TYPE is of kind va_list char *. */
4619 is_va_list_char_pointer (tree type
)
4623 /* For 32-bit it is always true. */
4626 canonic
= ix86_canonical_va_list_type (type
);
4627 return (canonic
== ms_va_list_type_node
4628 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
4631 /* Implement va_start. */
4634 ix86_va_start (tree valist
, rtx nextarg
)
4636 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4637 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4638 tree gpr
, fpr
, ovf
, sav
, t
;
4642 if (flag_split_stack
4643 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4645 unsigned int scratch_regno
;
4647 /* When we are splitting the stack, we can't refer to the stack
4648 arguments using internal_arg_pointer, because they may be on
4649 the old stack. The split stack prologue will arrange to
4650 leave a pointer to the old stack arguments in a scratch
4651 register, which we here copy to a pseudo-register. The split
4652 stack prologue can't set the pseudo-register directly because
4653 it (the prologue) runs before any registers have been saved. */
4655 scratch_regno
= split_stack_prologue_scratch_regno ();
4656 if (scratch_regno
!= INVALID_REGNUM
)
4661 reg
= gen_reg_rtx (Pmode
);
4662 cfun
->machine
->split_stack_varargs_pointer
= reg
;
4665 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
4669 push_topmost_sequence ();
4670 emit_insn_after (seq
, entry_of_function ());
4671 pop_topmost_sequence ();
4675 /* Only 64bit target needs something special. */
4676 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4678 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4679 std_expand_builtin_va_start (valist
, nextarg
);
4684 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
4685 next
= expand_binop (ptr_mode
, add_optab
,
4686 cfun
->machine
->split_stack_varargs_pointer
,
4687 crtl
->args
.arg_offset_rtx
,
4688 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4689 convert_move (va_r
, next
, 0);
4694 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4695 f_fpr
= DECL_CHAIN (f_gpr
);
4696 f_ovf
= DECL_CHAIN (f_fpr
);
4697 f_sav
= DECL_CHAIN (f_ovf
);
4699 valist
= build_simple_mem_ref (valist
);
4700 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
4701 /* The following should be folded into the MEM_REF offset. */
4702 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
4704 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
4706 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
4708 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
4711 /* Count number of gp and fp argument registers used. */
4712 words
= crtl
->args
.info
.words
;
4713 n_gpr
= crtl
->args
.info
.regno
;
4714 n_fpr
= crtl
->args
.info
.sse_regno
;
4716 if (cfun
->va_list_gpr_size
)
4718 type
= TREE_TYPE (gpr
);
4719 t
= build2 (MODIFY_EXPR
, type
,
4720 gpr
, build_int_cst (type
, n_gpr
* 8));
4721 TREE_SIDE_EFFECTS (t
) = 1;
4722 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4725 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4727 type
= TREE_TYPE (fpr
);
4728 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4729 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
4730 TREE_SIDE_EFFECTS (t
) = 1;
4731 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4734 /* Find the overflow area. */
4735 type
= TREE_TYPE (ovf
);
4736 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4737 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
4739 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
4740 t
= make_tree (type
, ovf_rtx
);
4742 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
4744 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4745 TREE_SIDE_EFFECTS (t
) = 1;
4746 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4748 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
4750 /* Find the register save area.
4751 Prologue of the function save it right above stack frame. */
4752 type
= TREE_TYPE (sav
);
4753 t
= make_tree (type
, frame_pointer_rtx
);
4754 if (!ix86_varargs_gpr_size
)
4755 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
4757 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4758 TREE_SIDE_EFFECTS (t
) = 1;
4759 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4763 /* Implement va_arg. */
4766 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4769 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4770 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4771 tree gpr
, fpr
, ovf
, sav
, t
;
4773 tree lab_false
, lab_over
= NULL_TREE
;
4778 machine_mode nat_mode
;
4779 unsigned int arg_boundary
;
4780 unsigned int type_align
;
4782 /* Only 64bit target needs something special. */
4783 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4784 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4786 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4787 f_fpr
= DECL_CHAIN (f_gpr
);
4788 f_ovf
= DECL_CHAIN (f_fpr
);
4789 f_sav
= DECL_CHAIN (f_ovf
);
4791 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
4792 valist
, f_gpr
, NULL_TREE
);
4794 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4795 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4796 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4798 indirect_p
= pass_va_arg_by_reference (type
);
4800 type
= build_pointer_type (type
);
4801 size
= arg_int_size_in_bytes (type
);
4802 rsize
= CEIL (size
, UNITS_PER_WORD
);
4804 nat_mode
= type_natural_mode (type
, NULL
, false);
4823 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4824 if (!TARGET_64BIT_MS_ABI
)
4832 container
= construct_container (nat_mode
, TYPE_MODE (type
),
4833 type
, 0, X86_64_REGPARM_MAX
,
4834 X86_64_SSE_REGPARM_MAX
, intreg
,
4839 /* Pull the value out of the saved registers. */
4841 addr
= create_tmp_var (ptr_type_node
, "addr");
4842 type_align
= TYPE_ALIGN (type
);
4846 int needed_intregs
, needed_sseregs
;
4848 tree int_addr
, sse_addr
;
4850 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
4851 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
4853 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4855 need_temp
= (!REG_P (container
)
4856 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4857 || TYPE_ALIGN (type
) > 128));
4859 /* In case we are passing structure, verify that it is consecutive block
4860 on the register save area. If not we need to do moves. */
4861 if (!need_temp
&& !REG_P (container
))
4863 /* Verify that all registers are strictly consecutive */
4864 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4868 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4870 rtx slot
= XVECEXP (container
, 0, i
);
4871 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4872 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4880 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4882 rtx slot
= XVECEXP (container
, 0, i
);
4883 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4884 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4896 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4897 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4900 /* First ensure that we fit completely in registers. */
4903 t
= build_int_cst (TREE_TYPE (gpr
),
4904 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
4905 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4906 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4907 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4908 gimplify_and_add (t
, pre_p
);
4912 t
= build_int_cst (TREE_TYPE (fpr
),
4913 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4914 + X86_64_REGPARM_MAX
* 8);
4915 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4916 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4917 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4918 gimplify_and_add (t
, pre_p
);
4921 /* Compute index to start of area used for integer regs. */
4924 /* int_addr = gpr + sav; */
4925 t
= fold_build_pointer_plus (sav
, gpr
);
4926 gimplify_assign (int_addr
, t
, pre_p
);
4930 /* sse_addr = fpr + sav; */
4931 t
= fold_build_pointer_plus (sav
, fpr
);
4932 gimplify_assign (sse_addr
, t
, pre_p
);
4936 int i
, prev_size
= 0;
4937 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4938 TREE_ADDRESSABLE (temp
) = 1;
4941 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4942 gimplify_assign (addr
, t
, pre_p
);
4944 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4946 rtx slot
= XVECEXP (container
, 0, i
);
4947 rtx reg
= XEXP (slot
, 0);
4948 machine_mode mode
= GET_MODE (reg
);
4954 tree dest_addr
, dest
;
4955 int cur_size
= GET_MODE_SIZE (mode
);
4957 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
4958 prev_size
= INTVAL (XEXP (slot
, 1));
4959 if (prev_size
+ cur_size
> size
)
4961 cur_size
= size
- prev_size
;
4962 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
4963 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
4966 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4967 if (mode
== GET_MODE (reg
))
4968 addr_type
= build_pointer_type (piece_type
);
4970 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4972 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4975 if (SSE_REGNO_P (REGNO (reg
)))
4977 src_addr
= sse_addr
;
4978 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4982 src_addr
= int_addr
;
4983 src_offset
= REGNO (reg
) * 8;
4985 src_addr
= fold_convert (addr_type
, src_addr
);
4986 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
4988 dest_addr
= fold_convert (daddr_type
, addr
);
4989 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
4990 if (cur_size
== GET_MODE_SIZE (mode
))
4992 src
= build_va_arg_indirect_ref (src_addr
);
4993 dest
= build_va_arg_indirect_ref (dest_addr
);
4995 gimplify_assign (dest
, src
, pre_p
);
5000 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
5001 3, dest_addr
, src_addr
,
5002 size_int (cur_size
));
5003 gimplify_and_add (copy
, pre_p
);
5005 prev_size
+= cur_size
;
5011 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5012 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5013 gimplify_assign (gpr
, t
, pre_p
);
5014 /* The GPR save area guarantees only 8-byte alignment. */
5016 type_align
= MIN (type_align
, 64);
5021 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5022 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5023 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
5026 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
5028 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
5031 /* ... otherwise out of the overflow area. */
5033 /* When we align parameter on stack for caller, if the parameter
5034 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5035 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5036 here with caller. */
5037 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
5038 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
5039 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
5041 /* Care for on-stack alignment if needed. */
5042 if (arg_boundary
<= 64 || size
== 0)
5046 HOST_WIDE_INT align
= arg_boundary
/ 8;
5047 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
5048 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5049 build_int_cst (TREE_TYPE (t
), -align
));
5052 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5053 gimplify_assign (addr
, t
, pre_p
);
5055 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
5056 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
5059 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
5061 type
= build_aligned_type (type
, type_align
);
5062 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
5063 addr
= fold_convert (ptrtype
, addr
);
5066 addr
= build_va_arg_indirect_ref (addr
);
5067 return build_va_arg_indirect_ref (addr
);
5070 /* Return true if OPNUM's MEM should be matched
5071 in movabs* patterns. */
5074 ix86_check_movabs (rtx insn
, int opnum
)
5078 set
= PATTERN (insn
);
5079 if (GET_CODE (set
) == PARALLEL
)
5080 set
= XVECEXP (set
, 0, 0);
5081 gcc_assert (GET_CODE (set
) == SET
);
5082 mem
= XEXP (set
, opnum
);
5083 while (SUBREG_P (mem
))
5084 mem
= SUBREG_REG (mem
);
5085 gcc_assert (MEM_P (mem
));
5086 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
5089 /* Return false if INSN contains a MEM with a non-default address space. */
5091 ix86_check_no_addr_space (rtx insn
)
5093 subrtx_var_iterator::array_type array
;
5094 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
5097 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
5103 /* Initialize the table of extra 80387 mathematical constants. */
5106 init_ext_80387_constants (void)
5108 static const char * cst
[5] =
5110 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5111 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5112 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5113 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5114 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5118 for (i
= 0; i
< 5; i
++)
5120 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5121 /* Ensure each constant is rounded to XFmode precision. */
5122 real_convert (&ext_80387_constants_table
[i
],
5123 XFmode
, &ext_80387_constants_table
[i
]);
5126 ext_80387_constants_init
= 1;
5129 /* Return non-zero if the constant is something that
5130 can be loaded with a special instruction. */
5133 standard_80387_constant_p (rtx x
)
5135 machine_mode mode
= GET_MODE (x
);
5137 const REAL_VALUE_TYPE
*r
;
5139 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
5142 if (x
== CONST0_RTX (mode
))
5144 if (x
== CONST1_RTX (mode
))
5147 r
= CONST_DOUBLE_REAL_VALUE (x
);
5149 /* For XFmode constants, try to find a special 80387 instruction when
5150 optimizing for size or on those CPUs that benefit from them. */
5152 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
)
5153 && !flag_rounding_math
)
5157 if (! ext_80387_constants_init
)
5158 init_ext_80387_constants ();
5160 for (i
= 0; i
< 5; i
++)
5161 if (real_identical (r
, &ext_80387_constants_table
[i
]))
5165 /* Load of the constant -0.0 or -1.0 will be split as
5166 fldz;fchs or fld1;fchs sequence. */
5167 if (real_isnegzero (r
))
5169 if (real_identical (r
, &dconstm1
))
5175 /* Return the opcode of the special instruction to be used to load
5179 standard_80387_constant_opcode (rtx x
)
5181 switch (standard_80387_constant_p (x
))
5205 /* Return the CONST_DOUBLE representing the 80387 constant that is
5206 loaded by the specified special instruction. The argument IDX
5207 matches the return value from standard_80387_constant_p. */
5210 standard_80387_constant_rtx (int idx
)
5214 if (! ext_80387_constants_init
)
5215 init_ext_80387_constants ();
5231 return const_double_from_real_value (ext_80387_constants_table
[i
],
5235 /* Return 1 if X is all bits 0, 2 if X is all bits 1
5236 and 3 if X is all bits 1 with zero extend
5237 in supported SSE/AVX vector mode. */
5240 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
5247 mode
= GET_MODE (x
);
5249 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5252 if (x
== constm1_rtx
5253 || vector_all_ones_operand (x
, mode
)
5254 || ((GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
5255 || GET_MODE_CLASS (pred_mode
) == MODE_VECTOR_FLOAT
)
5256 && float_vector_all_ones_operand (x
, mode
)))
5258 /* VOIDmode integer constant, get mode from the predicate. */
5259 if (mode
== VOIDmode
)
5262 switch (GET_MODE_SIZE (mode
))
5284 if (vector_all_ones_zero_extend_half_operand (x
, mode
)
5285 || vector_all_ones_zero_extend_quarter_operand (x
, mode
))
5291 /* Return the opcode of the special instruction to be used to load
5292 the constant operands[1] into operands[0]. */
5295 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
5298 rtx x
= operands
[1];
5300 gcc_assert (TARGET_SSE
);
5302 mode
= GET_MODE (x
);
5304 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5306 switch (get_attr_mode (insn
))
5309 if (!EXT_REX_SSE_REG_P (operands
[0]))
5310 return "%vpxor\t%0, %d0";
5314 if (EXT_REX_SSE_REG_P (operands
[0]))
5315 return (TARGET_AVX512VL
5316 ? "vpxord\t%x0, %x0, %x0"
5317 : "vpxord\t%g0, %g0, %g0");
5318 return "vpxor\t%x0, %x0, %x0";
5321 if (!EXT_REX_SSE_REG_P (operands
[0]))
5322 return "%vxorpd\t%0, %d0";
5326 if (!EXT_REX_SSE_REG_P (operands
[0]))
5327 return "vxorpd\t%x0, %x0, %x0";
5328 else if (TARGET_AVX512DQ
)
5329 return (TARGET_AVX512VL
5330 ? "vxorpd\t%x0, %x0, %x0"
5331 : "vxorpd\t%g0, %g0, %g0");
5333 return (TARGET_AVX512VL
5334 ? "vpxorq\t%x0, %x0, %x0"
5335 : "vpxorq\t%g0, %g0, %g0");
5338 if (!EXT_REX_SSE_REG_P (operands
[0]))
5339 return "%vxorps\t%0, %d0";
5343 if (!EXT_REX_SSE_REG_P (operands
[0]))
5344 return "vxorps\t%x0, %x0, %x0";
5345 else if (TARGET_AVX512DQ
)
5346 return (TARGET_AVX512VL
5347 ? "vxorps\t%x0, %x0, %x0"
5348 : "vxorps\t%g0, %g0, %g0");
5350 return (TARGET_AVX512VL
5351 ? "vpxord\t%x0, %x0, %x0"
5352 : "vpxord\t%g0, %g0, %g0");
5358 else if (x
== constm1_rtx
5359 || vector_all_ones_operand (x
, mode
)
5360 || (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
5361 && float_vector_all_ones_operand (x
, mode
)))
5363 enum attr_mode insn_mode
= get_attr_mode (insn
);
5370 gcc_assert (TARGET_AVX512F
);
5371 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5376 gcc_assert (TARGET_AVX2
);
5381 gcc_assert (TARGET_SSE2
);
5382 if (!EXT_REX_SSE_REG_P (operands
[0]))
5384 ? "vpcmpeqd\t%0, %0, %0"
5385 : "pcmpeqd\t%0, %0");
5386 else if (TARGET_AVX512VL
)
5387 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5389 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5395 else if (vector_all_ones_zero_extend_half_operand (x
, mode
))
5397 if (GET_MODE_SIZE (mode
) == 64)
5399 gcc_assert (TARGET_AVX512F
);
5400 return "vpcmpeqd\t%t0, %t0, %t0";
5402 else if (GET_MODE_SIZE (mode
) == 32)
5404 gcc_assert (TARGET_AVX
);
5405 return "vpcmpeqd\t%x0, %x0, %x0";
5409 else if (vector_all_ones_zero_extend_quarter_operand (x
, mode
))
5411 gcc_assert (TARGET_AVX512F
);
5412 return "vpcmpeqd\t%x0, %x0, %x0";
5418 /* Returns true if INSN can be transformed from a memory load
5419 to a supported FP constant load. */
5422 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
5424 rtx src
= find_constant_src (insn
);
5426 gcc_assert (REG_P (dst
));
5429 || (SSE_REGNO_P (REGNO (dst
))
5430 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
5431 || (STACK_REGNO_P (REGNO (dst
))
5432 && standard_80387_constant_p (src
) < 1))
5438 /* Predicate for pre-reload splitters with associated instructions,
5439 which can match any time before the split1 pass (usually combine),
5440 then are unconditionally split in that pass and should not be
5441 matched again afterwards. */
5444 ix86_pre_reload_split (void)
5446 return (can_create_pseudo_p ()
5447 && !(cfun
->curr_properties
& PROP_rtl_split_insns
));
5450 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5451 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5452 TARGET_AVX512VL or it is a register to register move which can
5453 be done with zmm register move. */
5456 ix86_get_ssemov (rtx
*operands
, unsigned size
,
5457 enum attr_mode insn_mode
, machine_mode mode
)
5460 bool misaligned_p
= (misaligned_operand (operands
[0], mode
)
5461 || misaligned_operand (operands
[1], mode
));
5462 bool evex_reg_p
= (size
== 64
5463 || EXT_REX_SSE_REG_P (operands
[0])
5464 || EXT_REX_SSE_REG_P (operands
[1]));
5465 machine_mode scalar_mode
;
5467 const char *opcode
= NULL
;
5473 } type
= opcode_int
;
5480 scalar_mode
= E_SFmode
;
5481 type
= opcode_float
;
5486 scalar_mode
= E_DFmode
;
5487 type
= opcode_double
;
5492 scalar_mode
= GET_MODE_INNER (mode
);
5498 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5499 we can only use zmm register move without memory operand. */
5502 && GET_MODE_SIZE (mode
) < 64)
5504 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5505 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5506 AVX512VL is disabled, LRA can still generate reg to
5507 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5509 if (memory_operand (operands
[0], mode
)
5510 || memory_operand (operands
[1], mode
))
5516 if (scalar_mode
== E_HFmode
|| scalar_mode
== E_BFmode
)
5517 opcode
= (misaligned_p
5518 ? (TARGET_AVX512BW
? "vmovdqu16" : "vmovdqu64")
5521 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5524 opcode
= misaligned_p
? "vmovups" : "vmovaps";
5527 opcode
= misaligned_p
? "vmovupd" : "vmovapd";
5531 else if (SCALAR_FLOAT_MODE_P (scalar_mode
))
5533 switch (scalar_mode
)
5538 opcode
= (misaligned_p
5544 opcode
= (misaligned_p
5551 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5554 opcode
= misaligned_p
? "%vmovupd" : "%vmovapd";
5558 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5560 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5566 else if (SCALAR_INT_MODE_P (scalar_mode
))
5568 switch (scalar_mode
)
5572 opcode
= (misaligned_p
5578 opcode
= (misaligned_p
5586 opcode
= (misaligned_p
5592 opcode
= (misaligned_p
5600 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5602 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5608 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5610 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5613 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5625 snprintf (buf
, sizeof (buf
), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5629 snprintf (buf
, sizeof (buf
), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5633 snprintf (buf
, sizeof (buf
), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5639 output_asm_insn (buf
, operands
);
5643 /* Return the template of the TYPE_SSEMOV instruction to move
5644 operands[1] into operands[0]. */
5647 ix86_output_ssemov (rtx_insn
*insn
, rtx
*operands
)
5649 machine_mode mode
= GET_MODE (operands
[0]);
5650 if (get_attr_type (insn
) != TYPE_SSEMOV
5651 || mode
!= GET_MODE (operands
[1]))
5654 enum attr_mode insn_mode
= get_attr_mode (insn
);
5661 return ix86_get_ssemov (operands
, 64, insn_mode
, mode
);
5666 return ix86_get_ssemov (operands
, 32, insn_mode
, mode
);
5671 return ix86_get_ssemov (operands
, 16, insn_mode
, mode
);
5674 /* Handle broken assemblers that require movd instead of movq. */
5675 if (GENERAL_REG_P (operands
[0]))
5677 if (HAVE_AS_IX86_INTERUNIT_MOVQ
)
5678 return "%vmovq\t{%1, %q0|%q0, %1}";
5680 return "%vmovd\t{%1, %q0|%q0, %1}";
5682 else if (GENERAL_REG_P (operands
[1]))
5684 if (HAVE_AS_IX86_INTERUNIT_MOVQ
)
5685 return "%vmovq\t{%q1, %0|%0, %q1}";
5687 return "%vmovd\t{%q1, %0|%0, %q1}";
5690 return "%vmovq\t{%1, %0|%0, %1}";
5693 if (GENERAL_REG_P (operands
[0]))
5694 return "%vmovd\t{%1, %k0|%k0, %1}";
5695 else if (GENERAL_REG_P (operands
[1]))
5696 return "%vmovd\t{%k1, %0|%0, %k1}";
5698 return "%vmovd\t{%1, %0|%0, %1}";
5701 if (GENERAL_REG_P (operands
[0]))
5702 return "vmovw\t{%1, %k0|%k0, %1}";
5703 else if (GENERAL_REG_P (operands
[1]))
5704 return "vmovw\t{%k1, %0|%0, %k1}";
5706 return "vmovw\t{%1, %0|%0, %1}";
5709 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5710 return "vmovsd\t{%d1, %0|%0, %d1}";
5712 return "%vmovsd\t{%1, %0|%0, %1}";
5715 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5716 return "vmovss\t{%d1, %0|%0, %d1}";
5718 return "%vmovss\t{%1, %0|%0, %1}";
5722 if (REG_P (operands
[0]) && REG_P (operands
[1]))
5723 return "vmovsh\t{%d1, %0|%0, %d1}";
5725 return "vmovsh\t{%1, %0|%0, %1}";
5728 gcc_assert (!TARGET_AVX
);
5729 return "movlpd\t{%1, %0|%0, %1}";
5732 if (TARGET_AVX
&& REG_P (operands
[0]))
5733 return "vmovlps\t{%1, %d0|%d0, %1}";
5735 return "%vmovlps\t{%1, %0|%0, %1}";
5742 /* Returns true if OP contains a symbol reference */
5745 symbolic_reference_mentioned_p (rtx op
)
5750 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5753 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5754 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5760 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5761 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5765 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5772 /* Return true if it is appropriate to emit `ret' instructions in the
5773 body of a function. Do this only if the epilogue is simple, needing a
5774 couple of insns. Prior to reloading, we can't tell how many registers
5775 must be saved, so return false then. Return false if there is no frame
5776 marker to de-allocate. */
5779 ix86_can_use_return_insn_p (void)
5781 if (ix86_function_ms_hook_prologue (current_function_decl
))
5784 if (ix86_function_naked (current_function_decl
))
5787 /* Don't use `ret' instruction in interrupt handler. */
5788 if (! reload_completed
5789 || frame_pointer_needed
5790 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
5793 /* Don't allow more than 32k pop, since that's all we can do
5794 with one instruction. */
5795 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
5798 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5799 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
5800 && (frame
.nregs
+ frame
.nsseregs
) == 0);
5803 /* Return stack frame size. get_frame_size () returns used stack slots
5804 during compilation, which may be optimized out later. If stack frame
5805 is needed, stack_frame_required should be true. */
5807 static HOST_WIDE_INT
5808 ix86_get_frame_size (void)
5810 if (cfun
->machine
->stack_frame_required
)
5811 return get_frame_size ();
5816 /* Value should be nonzero if functions must have frame pointers.
5817 Zero means the frame pointer need not be set up (and parms may
5818 be accessed via the stack pointer) in functions that seem suitable. */
5821 ix86_frame_pointer_required (void)
5823 /* If we accessed previous frames, then the generated code expects
5824 to be able to access the saved ebp value in our frame. */
5825 if (cfun
->machine
->accesses_prev_frame
)
5828 /* Several x86 os'es need a frame pointer for other reasons,
5829 usually pertaining to setjmp. */
5830 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5833 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5834 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
5837 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5838 allocation is 4GB. */
5839 if (TARGET_64BIT_MS_ABI
&& ix86_get_frame_size () > SEH_MAX_FRAME_SIZE
)
5842 /* SSE saves require frame-pointer when stack is misaligned. */
5843 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
5846 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5847 turns off the frame pointer by default. Turn it back on now if
5848 we've not got a leaf function. */
5849 if (TARGET_OMIT_LEAF_FRAME_POINTER
5851 || ix86_current_function_calls_tls_descriptor
))
5854 /* Several versions of mcount for the x86 assumes that there is a
5855 frame, so we cannot allow profiling without a frame pointer. */
5856 if (crtl
->profile
&& !flag_fentry
)
5862 /* Record that the current function accesses previous call frames. */
5865 ix86_setup_frame_addresses (void)
5867 cfun
->machine
->accesses_prev_frame
= 1;
5870 #ifndef USE_HIDDEN_LINKONCE
5871 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5872 # define USE_HIDDEN_LINKONCE 1
5874 # define USE_HIDDEN_LINKONCE 0
5878 /* Label count for call and return thunks. It is used to make unique
5879 labels in call and return thunks. */
5880 static int indirectlabelno
;
5882 /* True if call thunk function is needed. */
5883 static bool indirect_thunk_needed
= false;
5885 /* Bit masks of integer registers, which contain branch target, used
5886 by call thunk functions. */
5887 static HARD_REG_SET indirect_thunks_used
;
5889 /* True if return thunk function is needed. */
5890 static bool indirect_return_needed
= false;
5892 /* True if return thunk function via CX is needed. */
5893 static bool indirect_return_via_cx
;
5895 #ifndef INDIRECT_LABEL
5896 # define INDIRECT_LABEL "LIND"
5899 /* Indicate what prefix is needed for an indirect branch. */
5900 enum indirect_thunk_prefix
5902 indirect_thunk_prefix_none
,
5903 indirect_thunk_prefix_nt
5906 /* Return the prefix needed for an indirect branch INSN. */
5908 enum indirect_thunk_prefix
5909 indirect_thunk_need_prefix (rtx_insn
*insn
)
5911 enum indirect_thunk_prefix need_prefix
;
5912 if ((cfun
->machine
->indirect_branch_type
5913 == indirect_branch_thunk_extern
)
5914 && ix86_notrack_prefixed_insn_p (insn
))
5916 /* NOTRACK prefix is only used with external thunk so that it
5917 can be properly updated to support CET at run-time. */
5918 need_prefix
= indirect_thunk_prefix_nt
;
5921 need_prefix
= indirect_thunk_prefix_none
;
5925 /* Fills in the label name that should be used for the indirect thunk. */
5928 indirect_thunk_name (char name
[32], unsigned int regno
,
5929 enum indirect_thunk_prefix need_prefix
,
5932 if (regno
!= INVALID_REGNUM
&& regno
!= CX_REG
&& ret_p
)
5935 if (USE_HIDDEN_LINKONCE
)
5939 if (need_prefix
== indirect_thunk_prefix_nt
5940 && regno
!= INVALID_REGNUM
)
5942 /* NOTRACK prefix is only used with external thunk via
5943 register so that NOTRACK prefix can be added to indirect
5944 branch via register to support CET at run-time. */
5950 const char *ret
= ret_p
? "return" : "indirect";
5952 if (regno
!= INVALID_REGNUM
)
5954 const char *reg_prefix
;
5955 if (LEGACY_INT_REGNO_P (regno
))
5956 reg_prefix
= TARGET_64BIT
? "r" : "e";
5959 sprintf (name
, "__x86_%s_thunk%s_%s%s",
5960 ret
, prefix
, reg_prefix
, reg_names
[regno
]);
5963 sprintf (name
, "__x86_%s_thunk%s", ret
, prefix
);
5967 if (regno
!= INVALID_REGNUM
)
5968 ASM_GENERATE_INTERNAL_LABEL (name
, "LITR", regno
);
5972 ASM_GENERATE_INTERNAL_LABEL (name
, "LRT", 0);
5974 ASM_GENERATE_INTERNAL_LABEL (name
, "LIT", 0);
5979 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5980 the function address is in REGNO and the call and return thunk looks like:
5991 Otherwise, the function address is on the top of stack and the
5992 call and return thunk looks like:
6000 lea WORD_SIZE(%sp), %sp
6005 output_indirect_thunk (unsigned int regno
)
6007 char indirectlabel1
[32];
6008 char indirectlabel2
[32];
6010 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
, INDIRECT_LABEL
,
6012 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
, INDIRECT_LABEL
,
6016 fputs ("\tcall\t", asm_out_file
);
6017 assemble_name_raw (asm_out_file
, indirectlabel2
);
6018 fputc ('\n', asm_out_file
);
6020 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
6022 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6023 Usage of both pause + lfence is compromise solution. */
6024 fprintf (asm_out_file
, "\tpause\n\tlfence\n");
6027 fputs ("\tjmp\t", asm_out_file
);
6028 assemble_name_raw (asm_out_file
, indirectlabel1
);
6029 fputc ('\n', asm_out_file
);
6031 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
6033 /* The above call insn pushed a word to stack. Adjust CFI info. */
6034 if (flag_asynchronous_unwind_tables
&& dwarf2out_do_frame ())
6036 if (! dwarf2out_do_cfi_asm ())
6038 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
6039 xcfi
->dw_cfi_opc
= DW_CFA_advance_loc4
;
6040 xcfi
->dw_cfi_oprnd1
.dw_cfi_addr
= ggc_strdup (indirectlabel2
);
6041 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
6043 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
6044 xcfi
->dw_cfi_opc
= DW_CFA_def_cfa_offset
;
6045 xcfi
->dw_cfi_oprnd1
.dw_cfi_offset
= 2 * UNITS_PER_WORD
;
6046 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
6047 dwarf2out_emit_cfi (xcfi
);
6050 if (regno
!= INVALID_REGNUM
)
6054 xops
[0] = gen_rtx_MEM (word_mode
, stack_pointer_rtx
);
6055 xops
[1] = gen_rtx_REG (word_mode
, regno
);
6056 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops
);
6062 xops
[0] = stack_pointer_rtx
;
6063 xops
[1] = plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
6064 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops
);
6067 fputs ("\tret\n", asm_out_file
);
6068 if ((ix86_harden_sls
& harden_sls_return
))
6069 fputs ("\tint3\n", asm_out_file
);
6072 /* Output a funtion with a call and return thunk for indirect branch.
6073 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6074 Otherwise, the function address is on the top of stack. Thunk is
6075 used for function return if RET_P is true. */
6078 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix
,
6079 unsigned int regno
, bool ret_p
)
6084 /* Create __x86_indirect_thunk. */
6085 indirect_thunk_name (name
, regno
, need_prefix
, ret_p
);
6086 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
6087 get_identifier (name
),
6088 build_function_type_list (void_type_node
, NULL_TREE
));
6089 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
6090 NULL_TREE
, void_type_node
);
6091 TREE_PUBLIC (decl
) = 1;
6092 TREE_STATIC (decl
) = 1;
6093 DECL_IGNORED_P (decl
) = 1;
6098 switch_to_section (darwin_sections
[picbase_thunk_section
]);
6099 fputs ("\t.weak_definition\t", asm_out_file
);
6100 assemble_name (asm_out_file
, name
);
6101 fputs ("\n\t.private_extern\t", asm_out_file
);
6102 assemble_name (asm_out_file
, name
);
6103 putc ('\n', asm_out_file
);
6104 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6105 DECL_WEAK (decl
) = 1;
6109 if (USE_HIDDEN_LINKONCE
)
6111 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
6113 targetm
.asm_out
.unique_section (decl
, 0);
6114 switch_to_section (get_named_section (decl
, NULL
, 0));
6116 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
6117 fputs ("\t.hidden\t", asm_out_file
);
6118 assemble_name (asm_out_file
, name
);
6119 putc ('\n', asm_out_file
);
6120 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
6124 switch_to_section (text_section
);
6125 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6128 DECL_INITIAL (decl
) = make_node (BLOCK
);
6129 current_function_decl
= decl
;
6130 allocate_struct_function (decl
, false);
6131 init_function_start (decl
);
6132 /* We're about to hide the function body from callees of final_* by
6133 emitting it directly; tell them we're a thunk, if they care. */
6134 cfun
->is_thunk
= true;
6135 first_function_block_is_cold
= false;
6136 /* Make sure unwind info is emitted for the thunk if needed. */
6137 final_start_function (emit_barrier (), asm_out_file
, 1);
6139 output_indirect_thunk (regno
);
6141 final_end_function ();
6142 init_insn_lengths ();
6143 free_after_compilation (cfun
);
6145 current_function_decl
= NULL
;
6148 static int pic_labels_used
;
6150 /* Fills in the label name that should be used for a pc thunk for
6151 the given register. */
6154 get_pc_thunk_name (char name
[32], unsigned int regno
)
6156 gcc_assert (!TARGET_64BIT
);
6158 if (USE_HIDDEN_LINKONCE
)
6159 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
6161 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
6165 /* This function generates code for -fpic that loads %ebx with
6166 the return address of the caller and then returns. */
6169 ix86_code_end (void)
6174 if (indirect_return_needed
)
6175 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6176 INVALID_REGNUM
, true);
6177 if (indirect_return_via_cx
)
6178 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6180 if (indirect_thunk_needed
)
6181 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6182 INVALID_REGNUM
, false);
6184 for (regno
= FIRST_REX_INT_REG
; regno
<= LAST_REX_INT_REG
; regno
++)
6186 if (TEST_HARD_REG_BIT (indirect_thunks_used
, regno
))
6187 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6191 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
6196 if (TEST_HARD_REG_BIT (indirect_thunks_used
, regno
))
6197 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6200 if (!(pic_labels_used
& (1 << regno
)))
6203 get_pc_thunk_name (name
, regno
);
6205 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
6206 get_identifier (name
),
6207 build_function_type_list (void_type_node
, NULL_TREE
));
6208 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
6209 NULL_TREE
, void_type_node
);
6210 TREE_PUBLIC (decl
) = 1;
6211 TREE_STATIC (decl
) = 1;
6212 DECL_IGNORED_P (decl
) = 1;
6217 switch_to_section (darwin_sections
[picbase_thunk_section
]);
6218 fputs ("\t.weak_definition\t", asm_out_file
);
6219 assemble_name (asm_out_file
, name
);
6220 fputs ("\n\t.private_extern\t", asm_out_file
);
6221 assemble_name (asm_out_file
, name
);
6222 putc ('\n', asm_out_file
);
6223 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6224 DECL_WEAK (decl
) = 1;
6228 if (USE_HIDDEN_LINKONCE
)
6230 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
6232 targetm
.asm_out
.unique_section (decl
, 0);
6233 switch_to_section (get_named_section (decl
, NULL
, 0));
6235 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
6236 fputs ("\t.hidden\t", asm_out_file
);
6237 assemble_name (asm_out_file
, name
);
6238 putc ('\n', asm_out_file
);
6239 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
6243 switch_to_section (text_section
);
6244 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6247 DECL_INITIAL (decl
) = make_node (BLOCK
);
6248 current_function_decl
= decl
;
6249 allocate_struct_function (decl
, false);
6250 init_function_start (decl
);
6251 /* We're about to hide the function body from callees of final_* by
6252 emitting it directly; tell them we're a thunk, if they care. */
6253 cfun
->is_thunk
= true;
6254 first_function_block_is_cold
= false;
6255 /* Make sure unwind info is emitted for the thunk if needed. */
6256 final_start_function (emit_barrier (), asm_out_file
, 1);
6258 /* Pad stack IP move with 4 instructions (two NOPs count
6259 as one instruction). */
6260 if (TARGET_PAD_SHORT_FUNCTION
)
6265 fputs ("\tnop\n", asm_out_file
);
6268 xops
[0] = gen_rtx_REG (Pmode
, regno
);
6269 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
6270 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
6271 fputs ("\tret\n", asm_out_file
);
6272 final_end_function ();
6273 init_insn_lengths ();
6274 free_after_compilation (cfun
);
6276 current_function_decl
= NULL
;
6279 if (flag_split_stack
)
6280 file_end_indicate_split_stack ();
6283 /* Emit code for the SET_GOT patterns. */
6286 output_set_got (rtx dest
, rtx label
)
6292 if (TARGET_VXWORKS_RTP
&& flag_pic
)
6294 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6295 xops
[2] = gen_rtx_MEM (Pmode
,
6296 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
6297 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
6299 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6300 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6301 an unadorned address. */
6302 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6303 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
6304 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
6308 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
6313 get_pc_thunk_name (name
, REGNO (dest
));
6314 pic_labels_used
|= 1 << REGNO (dest
);
6316 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
6317 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
6318 output_asm_insn ("%!call\t%X2", xops
);
6321 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6322 This is what will be referenced by the Mach-O PIC subsystem. */
6323 if (machopic_should_output_picbase_label () || !label
)
6324 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
6326 /* When we are restoring the pic base at the site of a nonlocal label,
6327 and we decided to emit the pic base above, we will still output a
6328 local label used for calculating the correction offset (even though
6329 the offset will be 0 in that case). */
6331 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6332 CODE_LABEL_NUMBER (label
));
6338 /* We don't need a pic base, we're not producing pic. */
6341 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
6342 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
6343 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6344 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
6348 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
6353 /* Generate an "push" pattern for input ARG. */
6358 struct machine_function
*m
= cfun
->machine
;
6360 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6361 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
6362 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
6364 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6365 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6367 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
6368 gen_rtx_PRE_DEC (Pmode
,
6369 stack_pointer_rtx
)),
6373 /* Generate an "pop" pattern for input ARG. */
6378 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6379 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6381 return gen_rtx_SET (arg
,
6382 gen_rtx_MEM (word_mode
,
6383 gen_rtx_POST_INC (Pmode
,
6384 stack_pointer_rtx
)));
6387 /* Return >= 0 if there is an unused call-clobbered register available
6388 for the entire function. */
6391 ix86_select_alt_pic_regnum (void)
6393 if (ix86_use_pseudo_pic_reg ())
6394 return INVALID_REGNUM
;
6398 && !ix86_current_function_calls_tls_descriptor
)
6401 /* Can't use the same register for both PIC and DRAP. */
6403 drap
= REGNO (crtl
->drap_reg
);
6406 for (i
= 2; i
>= 0; --i
)
6407 if (i
!= drap
&& !df_regs_ever_live_p (i
))
6411 return INVALID_REGNUM
;
6414 /* Return true if REGNO is used by the epilogue. */
6417 ix86_epilogue_uses (int regno
)
6419 /* If there are no caller-saved registers, we preserve all registers,
6420 except for MMX and x87 registers which aren't supported when saving
6421 and restoring registers. Don't explicitly save SP register since
6422 it is always preserved. */
6423 return (epilogue_completed
6424 && cfun
->machine
->no_caller_saved_registers
6425 && !fixed_regs
[regno
]
6426 && !STACK_REGNO_P (regno
)
6427 && !MMX_REGNO_P (regno
));
6430 /* Return nonzero if register REGNO can be used as a scratch register
6434 ix86_hard_regno_scratch_ok (unsigned int regno
)
6436 /* If there are no caller-saved registers, we can't use any register
6437 as a scratch register after epilogue and use REGNO as scratch
6438 register only if it has been used before to avoid saving and
6440 return (!cfun
->machine
->no_caller_saved_registers
6441 || (!epilogue_completed
6442 && df_regs_ever_live_p (regno
)));
6445 /* Return TRUE if we need to save REGNO. */
6448 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
6450 /* If there are no caller-saved registers, we preserve all registers,
6451 except for MMX and x87 registers which aren't supported when saving
6452 and restoring registers. Don't explicitly save SP register since
6453 it is always preserved. */
6454 if (cfun
->machine
->no_caller_saved_registers
)
6456 /* Don't preserve registers used for function return value. */
6457 rtx reg
= crtl
->return_rtx
;
6460 unsigned int i
= REGNO (reg
);
6461 unsigned int nregs
= REG_NREGS (reg
);
6463 if ((i
+ nregs
) == regno
)
6467 return (df_regs_ever_live_p (regno
)
6468 && !fixed_regs
[regno
]
6469 && !STACK_REGNO_P (regno
)
6470 && !MMX_REGNO_P (regno
)
6471 && (regno
!= HARD_FRAME_POINTER_REGNUM
6472 || !frame_pointer_needed
));
6475 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
6476 && pic_offset_table_rtx
)
6478 if (ix86_use_pseudo_pic_reg ())
6480 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6481 _mcount in prologue. */
6482 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
6485 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6487 || crtl
->calls_eh_return
6488 || crtl
->uses_const_pool
6489 || cfun
->has_nonlocal_label
)
6490 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
6493 if (crtl
->calls_eh_return
&& maybe_eh_return
)
6498 unsigned test
= EH_RETURN_DATA_REGNO (i
);
6499 if (test
== INVALID_REGNUM
)
6506 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
6508 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
6509 + xlogue_layout::MIN_REGS
;
6510 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
6515 && regno
== REGNO (crtl
->drap_reg
)
6516 && !cfun
->machine
->no_drap_save_restore
)
6519 return (df_regs_ever_live_p (regno
)
6520 && !call_used_or_fixed_reg_p (regno
)
6521 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
6524 /* Return number of saved general prupose registers. */
6527 ix86_nsaved_regs (void)
6532 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6533 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6538 /* Return number of saved SSE registers. */
6541 ix86_nsaved_sseregs (void)
6546 if (!TARGET_64BIT_MS_ABI
)
6548 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6549 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6554 /* Given FROM and TO register numbers, say whether this elimination is
6555 allowed. If stack alignment is needed, we can only replace argument
6556 pointer with hard frame pointer, or replace frame pointer with stack
6557 pointer. Otherwise, frame pointer elimination is automatically
6558 handled and all other eliminations are valid. */
6561 ix86_can_eliminate (const int from
, const int to
)
6563 if (stack_realign_fp
)
6564 return ((from
== ARG_POINTER_REGNUM
6565 && to
== HARD_FRAME_POINTER_REGNUM
)
6566 || (from
== FRAME_POINTER_REGNUM
6567 && to
== STACK_POINTER_REGNUM
));
6569 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
6572 /* Return the offset between two registers, one to be eliminated, and the other
6573 its replacement, at the start of a routine. */
6576 ix86_initial_elimination_offset (int from
, int to
)
6578 struct ix86_frame
&frame
= cfun
->machine
->frame
;
6580 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
6581 return frame
.hard_frame_pointer_offset
;
6582 else if (from
== FRAME_POINTER_REGNUM
6583 && to
== HARD_FRAME_POINTER_REGNUM
)
6584 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
6587 gcc_assert (to
== STACK_POINTER_REGNUM
);
6589 if (from
== ARG_POINTER_REGNUM
)
6590 return frame
.stack_pointer_offset
;
6592 gcc_assert (from
== FRAME_POINTER_REGNUM
);
6593 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
6597 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6599 warn_once_call_ms2sysv_xlogues (const char *feature
)
6601 static bool warned_once
= false;
6604 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6610 /* Return the probing interval for -fstack-clash-protection. */
6612 static HOST_WIDE_INT
6613 get_probe_interval (void)
6615 if (flag_stack_clash_protection
)
6616 return (HOST_WIDE_INT_1U
6617 << param_stack_clash_protection_probe_interval
);
6619 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
6622 /* When using -fsplit-stack, the allocation routines set a field in
6623 the TCB to the bottom of the stack plus this much space, measured
6626 #define SPLIT_STACK_AVAILABLE 256
6628 /* Fill structure ix86_frame about frame of currently computed function. */
6631 ix86_compute_frame_layout (void)
6633 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
6634 struct machine_function
*m
= cfun
->machine
;
6635 unsigned HOST_WIDE_INT stack_alignment_needed
;
6636 HOST_WIDE_INT offset
;
6637 unsigned HOST_WIDE_INT preferred_alignment
;
6638 HOST_WIDE_INT size
= ix86_get_frame_size ();
6639 HOST_WIDE_INT to_allocate
;
6641 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6642 * ms_abi functions that call a sysv function. We now need to prune away
6643 * cases where it should be disabled. */
6644 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6646 gcc_assert (TARGET_64BIT_MS_ABI
);
6647 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
6648 gcc_assert (!TARGET_SEH
);
6649 gcc_assert (TARGET_SSE
);
6650 gcc_assert (!ix86_using_red_zone ());
6652 if (crtl
->calls_eh_return
)
6654 gcc_assert (!reload_completed
);
6655 m
->call_ms2sysv
= false;
6656 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6659 else if (ix86_static_chain_on_stack
)
6661 gcc_assert (!reload_completed
);
6662 m
->call_ms2sysv
= false;
6663 warn_once_call_ms2sysv_xlogues ("static call chains");
6666 /* Finally, compute which registers the stub will manage. */
6669 unsigned count
= xlogue_layout::count_stub_managed_regs ();
6670 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
6671 m
->call_ms2sysv_pad_in
= 0;
6675 frame
->nregs
= ix86_nsaved_regs ();
6676 frame
->nsseregs
= ix86_nsaved_sseregs ();
6678 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6679 except for function prologues, leaf functions and when the defult
6680 incoming stack boundary is overriden at command line or via
6681 force_align_arg_pointer attribute.
6683 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6684 at call sites, including profile function calls.
6686 if (((TARGET_64BIT_MS_ABI
|| TARGET_MACHO
)
6687 && crtl
->preferred_stack_boundary
< 128)
6688 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
6689 || ix86_current_function_calls_tls_descriptor
6690 || (TARGET_MACHO
&& crtl
->profile
)
6691 || ix86_incoming_stack_boundary
< 128))
6693 crtl
->preferred_stack_boundary
= 128;
6694 crtl
->stack_alignment_needed
= 128;
6697 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
6698 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
6700 gcc_assert (!size
|| stack_alignment_needed
);
6701 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
6702 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
6704 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6705 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
6706 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6708 gcc_assert (stack_alignment_needed
>= 16);
6709 gcc_assert (!frame
->nsseregs
);
6712 /* For SEH we have to limit the amount of code movement into the prologue.
6713 At present we do this via a BLOCKAGE, at which point there's very little
6714 scheduling that can be done, which means that there's very little point
6715 in doing anything except PUSHs. */
6717 m
->use_fast_prologue_epilogue
= false;
6718 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
6720 int count
= frame
->nregs
;
6721 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
6723 /* The fast prologue uses move instead of push to save registers. This
6724 is significantly longer, but also executes faster as modern hardware
6725 can execute the moves in parallel, but can't do that for push/pop.
6727 Be careful about choosing what prologue to emit: When function takes
6728 many instructions to execute we may use slow version as well as in
6729 case function is known to be outside hot spot (this is known with
6730 feedback only). Weight the size of function by number of registers
6731 to save as it is cheap to use one or two push instructions but very
6732 slow to use many of them.
6734 Calling this hook multiple times with the same frame requirements
6735 must produce the same layout, since the RA might otherwise be
6736 unable to reach a fixed point or might fail its final sanity checks.
6737 This means that once we've assumed that a function does or doesn't
6738 have a particular size, we have to stick to that assumption
6739 regardless of how the function has changed since. */
6741 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
6742 if (node
->frequency
< NODE_FREQUENCY_NORMAL
6743 || (flag_branch_probabilities
6744 && node
->frequency
< NODE_FREQUENCY_HOT
))
6745 m
->use_fast_prologue_epilogue
= false;
6748 if (count
!= frame
->expensive_count
)
6750 frame
->expensive_count
= count
;
6751 frame
->expensive_p
= expensive_function_p (count
);
6753 m
->use_fast_prologue_epilogue
= !frame
->expensive_p
;
6757 frame
->save_regs_using_mov
6758 = TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
;
6760 /* Skip return address and error code in exception handler. */
6761 offset
= INCOMING_FRAME_SP_OFFSET
;
6763 /* Skip pushed static chain. */
6764 if (ix86_static_chain_on_stack
)
6765 offset
+= UNITS_PER_WORD
;
6767 /* Skip saved base pointer. */
6768 if (frame_pointer_needed
)
6769 offset
+= UNITS_PER_WORD
;
6770 frame
->hfp_save_offset
= offset
;
6772 /* The traditional frame pointer location is at the top of the frame. */
6773 frame
->hard_frame_pointer_offset
= offset
;
6775 /* Register save area */
6776 offset
+= frame
->nregs
* UNITS_PER_WORD
;
6777 frame
->reg_save_offset
= offset
;
6779 /* Calculate the size of the va-arg area (not including padding, if any). */
6780 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
6782 /* Also adjust stack_realign_offset for the largest alignment of
6783 stack slot actually used. */
6784 if (stack_realign_fp
6785 || (cfun
->machine
->max_used_stack_alignment
!= 0
6786 && (offset
% cfun
->machine
->max_used_stack_alignment
) != 0))
6788 /* We may need a 16-byte aligned stack for the remainder of the
6789 register save area, but the stack frame for the local function
6790 may require a greater alignment if using AVX/2/512. In order
6791 to avoid wasting space, we first calculate the space needed for
6792 the rest of the register saves, add that to the stack pointer,
6793 and then realign the stack to the boundary of the start of the
6794 frame for the local function. */
6795 HOST_WIDE_INT space_needed
= 0;
6796 HOST_WIDE_INT sse_reg_space_needed
= 0;
6800 if (m
->call_ms2sysv
)
6802 m
->call_ms2sysv_pad_in
= 0;
6803 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
6806 else if (frame
->nsseregs
)
6807 /* The only ABI that has saved SSE registers (Win64) also has a
6808 16-byte aligned default stack. However, many programs violate
6809 the ABI, and Wine64 forces stack realignment to compensate. */
6810 space_needed
= frame
->nsseregs
* 16;
6812 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
6814 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6815 rounding to be pedantic. */
6816 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
6819 space_needed
= frame
->va_arg_size
;
6821 /* Record the allocation size required prior to the realignment AND. */
6822 frame
->stack_realign_allocate
= space_needed
;
6824 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6825 before this point are not directly comparable with values below
6826 this point. Use sp_valid_at to determine if the stack pointer is
6827 valid for a given offset, fp_valid_at for the frame pointer, or
6828 choose_baseaddr to have a base register chosen for you.
6830 Note that the result of (frame->stack_realign_offset
6831 & (stack_alignment_needed - 1)) may not equal zero. */
6832 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
6833 frame
->stack_realign_offset
= offset
- space_needed
;
6834 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
6835 + sse_reg_space_needed
;
6839 frame
->stack_realign_offset
= offset
;
6841 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6843 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
6844 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
6847 /* Align and set SSE register save area. */
6848 else if (frame
->nsseregs
)
6850 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6851 required and the DRAP re-alignment boundary is at least 16 bytes,
6852 then we want the SSE register save area properly aligned. */
6853 if (ix86_incoming_stack_boundary
>= 128
6854 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
6855 offset
= ROUND_UP (offset
, 16);
6856 offset
+= frame
->nsseregs
* 16;
6858 frame
->sse_reg_save_offset
= offset
;
6859 offset
+= frame
->va_arg_size
;
6862 /* Align start of frame for local function. When a function call
6863 is removed, it may become a leaf function. But if argument may
6864 be passed on stack, we need to align the stack when there is no
6867 || frame
->va_arg_size
!= 0
6870 || (!crtl
->tail_call_emit
6871 && cfun
->machine
->outgoing_args_on_stack
)
6872 || cfun
->calls_alloca
6873 || ix86_current_function_calls_tls_descriptor
)
6874 offset
= ROUND_UP (offset
, stack_alignment_needed
);
6876 /* Frame pointer points here. */
6877 frame
->frame_pointer_offset
= offset
;
6881 /* Add outgoing arguments area. Can be skipped if we eliminated
6882 all the function calls as dead code.
6883 Skipping is however impossible when function calls alloca. Alloca
6884 expander assumes that last crtl->outgoing_args_size
6885 of stack frame are unused. */
6886 if (ACCUMULATE_OUTGOING_ARGS
6887 && (!crtl
->is_leaf
|| cfun
->calls_alloca
6888 || ix86_current_function_calls_tls_descriptor
))
6890 offset
+= crtl
->outgoing_args_size
;
6891 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
6894 frame
->outgoing_arguments_size
= 0;
6896 /* Align stack boundary. Only needed if we're calling another function
6898 if (!crtl
->is_leaf
|| cfun
->calls_alloca
6899 || ix86_current_function_calls_tls_descriptor
)
6900 offset
= ROUND_UP (offset
, preferred_alignment
);
6902 /* We've reached end of stack frame. */
6903 frame
->stack_pointer_offset
= offset
;
6905 /* Size prologue needs to allocate. */
6906 to_allocate
= offset
- frame
->sse_reg_save_offset
;
6908 if ((!to_allocate
&& frame
->nregs
<= 1)
6909 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000))
6910 /* If static stack checking is enabled and done with probes,
6911 the registers need to be saved before allocating the frame. */
6912 || flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
6913 /* If stack clash probing needs a loop, then it needs a
6914 scratch register. But the returned register is only guaranteed
6915 to be safe to use after register saves are complete. So if
6916 stack clash protections are enabled and the allocated frame is
6917 larger than the probe interval, then use pushes to save
6918 callee saved registers. */
6919 || (flag_stack_clash_protection
6920 && !ix86_target_stack_probe ()
6921 && to_allocate
> get_probe_interval ()))
6922 frame
->save_regs_using_mov
= false;
6924 if (ix86_using_red_zone ()
6925 && crtl
->sp_is_unchanging
6927 && !ix86_pc_thunk_call_expanded
6928 && !ix86_current_function_calls_tls_descriptor
)
6930 frame
->red_zone_size
= to_allocate
;
6931 if (frame
->save_regs_using_mov
)
6932 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6933 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6934 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6937 frame
->red_zone_size
= 0;
6938 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6940 /* The SEH frame pointer location is near the bottom of the frame.
6941 This is enforced by the fact that the difference between the
6942 stack pointer and the frame pointer is limited to 240 bytes in
6943 the unwind data structure. */
6946 /* Force the frame pointer to point at or below the lowest register save
6947 area, see the SEH code in config/i386/winnt.cc for the rationale. */
6948 frame
->hard_frame_pointer_offset
= frame
->sse_reg_save_offset
;
6950 /* If we can leave the frame pointer where it is, do so; however return
6951 the establisher frame for __builtin_frame_address (0) or else if the
6952 frame overflows the SEH maximum frame size.
6954 Note that the value returned by __builtin_frame_address (0) is quite
6955 constrained, because setjmp is piggybacked on the SEH machinery with
6956 recent versions of MinGW:
6958 # elif defined(__SEH__)
6959 # if defined(__aarch64__) || defined(_ARM64_)
6960 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6961 # elif (__MINGW_GCC_VERSION < 40702)
6962 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6964 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6967 and the second argument passed to _setjmp, if not null, is forwarded
6968 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6969 built an ExceptionRecord on the fly describing the setjmp buffer). */
6970 const HOST_WIDE_INT diff
6971 = frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
6972 if (diff
<= 255 && !crtl
->accesses_prior_frames
)
6974 /* The resulting diff will be a multiple of 16 lower than 255,
6975 i.e. at most 240 as required by the unwind data structure. */
6976 frame
->hard_frame_pointer_offset
+= (diff
& 15);
6978 else if (diff
<= SEH_MAX_FRAME_SIZE
&& !crtl
->accesses_prior_frames
)
6980 /* Ideally we'd determine what portion of the local stack frame
6981 (within the constraint of the lowest 240) is most heavily used.
6982 But without that complication, simply bias the frame pointer
6983 by 128 bytes so as to maximize the amount of the local stack
6984 frame that is addressable with 8-bit offsets. */
6985 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
6988 frame
->hard_frame_pointer_offset
= frame
->hfp_save_offset
;
6992 /* This is semi-inlined memory_address_length, but simplified
6993 since we know that we're always dealing with reg+offset, and
6994 to avoid having to create and discard all that rtl. */
6997 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
7003 /* EBP and R13 cannot be encoded without an offset. */
7004 len
= (regno
== BP_REG
|| regno
== R13_REG
);
7006 else if (IN_RANGE (offset
, -128, 127))
7009 /* ESP and R12 must be encoded with a SIB byte. */
7010 if (regno
== SP_REG
|| regno
== R12_REG
)
7016 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7017 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7020 sp_valid_at (HOST_WIDE_INT cfa_offset
)
7022 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
7023 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
7025 /* Validate that the cfa_offset isn't in a "no-man's land". */
7026 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
7032 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7033 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7036 fp_valid_at (HOST_WIDE_INT cfa_offset
)
7038 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
7039 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
7041 /* Validate that the cfa_offset isn't in a "no-man's land". */
7042 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
7048 /* Choose a base register based upon alignment requested, speed and/or
7052 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
7053 HOST_WIDE_INT
&base_offset
,
7054 unsigned int align_reqested
, unsigned int *align
)
7056 const struct machine_function
*m
= cfun
->machine
;
7057 unsigned int hfp_align
;
7058 unsigned int drap_align
;
7059 unsigned int sp_align
;
7060 bool hfp_ok
= fp_valid_at (cfa_offset
);
7061 bool drap_ok
= m
->fs
.drap_valid
;
7062 bool sp_ok
= sp_valid_at (cfa_offset
);
7064 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
7066 /* Filter out any registers that don't meet the requested alignment
7070 if (m
->fs
.realigned
)
7071 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
7072 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7073 notes (which we would need to use a realigned stack pointer),
7074 so disable on SEH targets. */
7075 else if (m
->fs
.sp_realigned
)
7076 sp_align
= crtl
->stack_alignment_needed
;
7078 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
7079 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
7080 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
7083 if (m
->use_fast_prologue_epilogue
)
7085 /* Choose the base register most likely to allow the most scheduling
7086 opportunities. Generally FP is valid throughout the function,
7087 while DRAP must be reloaded within the epilogue. But choose either
7088 over the SP due to increased encoding size. */
7092 base_reg
= hard_frame_pointer_rtx
;
7093 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
7097 base_reg
= crtl
->drap_reg
;
7098 base_offset
= 0 - cfa_offset
;
7102 base_reg
= stack_pointer_rtx
;
7103 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
7108 HOST_WIDE_INT toffset
;
7111 /* Choose the base register with the smallest address encoding.
7112 With a tie, choose FP > DRAP > SP. */
7115 base_reg
= stack_pointer_rtx
;
7116 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
7117 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
7121 toffset
= 0 - cfa_offset
;
7122 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
7125 base_reg
= crtl
->drap_reg
;
7126 base_offset
= toffset
;
7132 toffset
= m
->fs
.fp_offset
- cfa_offset
;
7133 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
7136 base_reg
= hard_frame_pointer_rtx
;
7137 base_offset
= toffset
;
7142 /* Set the align return value. */
7145 if (base_reg
== stack_pointer_rtx
)
7147 else if (base_reg
== crtl
->drap_reg
)
7148 *align
= drap_align
;
7149 else if (base_reg
== hard_frame_pointer_rtx
)
7154 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7155 the alignment of address. If ALIGN is non-null, it should point to
7156 an alignment value (in bits) that is preferred or zero and will
7157 recieve the alignment of the base register that was selected,
7158 irrespective of rather or not CFA_OFFSET is a multiple of that
7159 alignment value. If it is possible for the base register offset to be
7160 non-immediate then SCRATCH_REGNO should specify a scratch register to
7163 The valid base registers are taken from CFUN->MACHINE->FS. */
7166 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
7167 unsigned int scratch_regno
= INVALID_REGNUM
)
7169 rtx base_reg
= NULL
;
7170 HOST_WIDE_INT base_offset
= 0;
7172 /* If a specific alignment is requested, try to get a base register
7173 with that alignment first. */
7174 if (align
&& *align
)
7175 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
7178 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
7180 gcc_assert (base_reg
!= NULL
);
7182 rtx base_offset_rtx
= GEN_INT (base_offset
);
7184 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
7186 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
7188 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
7189 emit_move_insn (scratch_reg
, base_offset_rtx
);
7191 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
7194 return plus_constant (Pmode
, base_reg
, base_offset
);
7197 /* Emit code to save registers in the prologue. */
7200 ix86_emit_save_regs (void)
7205 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
7206 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7208 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
7209 RTX_FRAME_RELATED_P (insn
) = 1;
7213 /* Emit a single register save at CFA - CFA_OFFSET. */
7216 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
7217 HOST_WIDE_INT cfa_offset
)
7219 struct machine_function
*m
= cfun
->machine
;
7220 rtx reg
= gen_rtx_REG (mode
, regno
);
7221 rtx mem
, addr
, base
, insn
;
7222 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
7224 addr
= choose_baseaddr (cfa_offset
, &align
);
7225 mem
= gen_frame_mem (mode
, addr
);
7227 /* The location aligment depends upon the base register. */
7228 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
7229 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
7230 set_mem_align (mem
, align
);
7232 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
7233 RTX_FRAME_RELATED_P (insn
) = 1;
7236 if (GET_CODE (base
) == PLUS
)
7237 base
= XEXP (base
, 0);
7238 gcc_checking_assert (REG_P (base
));
7240 /* When saving registers into a re-aligned local stack frame, avoid
7241 any tricky guessing by dwarf2out. */
7242 if (m
->fs
.realigned
)
7244 gcc_checking_assert (stack_realign_drap
);
7246 if (regno
== REGNO (crtl
->drap_reg
))
7248 /* A bit of a hack. We force the DRAP register to be saved in
7249 the re-aligned stack frame, which provides us with a copy
7250 of the CFA that will last past the prologue. Install it. */
7251 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
7252 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
7253 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
7254 mem
= gen_rtx_MEM (mode
, addr
);
7255 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
7259 /* The frame pointer is a stable reference within the
7260 aligned frame. Use it. */
7261 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
7262 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
7263 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
7264 mem
= gen_rtx_MEM (mode
, addr
);
7265 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
7269 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
7270 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
7272 gcc_checking_assert (stack_realign_fp
);
7273 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
7276 /* The memory may not be relative to the current CFA register,
7277 which means that we may need to generate a new pattern for
7278 use by the unwind info. */
7279 else if (base
!= m
->fs
.cfa_reg
)
7281 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
7282 m
->fs
.cfa_offset
- cfa_offset
);
7283 mem
= gen_rtx_MEM (mode
, addr
);
7284 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
7288 /* Emit code to save registers using MOV insns.
7289 First register is stored at CFA - CFA_OFFSET. */
7291 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
7295 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7296 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7298 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
7299 cfa_offset
-= UNITS_PER_WORD
;
7303 /* Emit code to save SSE registers using MOV insns.
7304 First register is stored at CFA - CFA_OFFSET. */
7306 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
7310 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7311 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7313 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
7314 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
7318 static GTY(()) rtx queued_cfa_restores
;
7320 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7321 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7322 Don't add the note if the previously saved value will be left untouched
7323 within stack red-zone till return, as unwinders can find the same value
7324 in the register and on the stack. */
7327 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
7329 if (!crtl
->shrink_wrapped
7330 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
7335 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
7336 RTX_FRAME_RELATED_P (insn
) = 1;
7340 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
7343 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7346 ix86_add_queued_cfa_restore_notes (rtx insn
)
7349 if (!queued_cfa_restores
)
7351 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
7353 XEXP (last
, 1) = REG_NOTES (insn
);
7354 REG_NOTES (insn
) = queued_cfa_restores
;
7355 queued_cfa_restores
= NULL_RTX
;
7356 RTX_FRAME_RELATED_P (insn
) = 1;
7359 /* Expand prologue or epilogue stack adjustment.
7360 The pattern exist to put a dependency on all ebp-based memory accesses.
7361 STYLE should be negative if instructions should be marked as frame related,
7362 zero if %r11 register is live and cannot be freely used and positive
7366 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
7367 int style
, bool set_cfa
)
7369 struct machine_function
*m
= cfun
->machine
;
7370 rtx addend
= offset
;
7372 bool add_frame_related_expr
= false;
7374 if (!x86_64_immediate_operand (offset
, Pmode
))
7376 /* r11 is used by indirect sibcall return as well, set before the
7377 epilogue and used after the epilogue. */
7379 addend
= gen_rtx_REG (Pmode
, R11_REG
);
7382 gcc_assert (src
!= hard_frame_pointer_rtx
7383 && dest
!= hard_frame_pointer_rtx
);
7384 addend
= hard_frame_pointer_rtx
;
7386 emit_insn (gen_rtx_SET (addend
, offset
));
7388 add_frame_related_expr
= true;
7391 insn
= emit_insn (gen_pro_epilogue_adjust_stack_add
7392 (Pmode
, dest
, src
, addend
));
7394 ix86_add_queued_cfa_restore_notes (insn
);
7400 gcc_assert (m
->fs
.cfa_reg
== src
);
7401 m
->fs
.cfa_offset
+= INTVAL (offset
);
7402 m
->fs
.cfa_reg
= dest
;
7404 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7405 r
= gen_rtx_SET (dest
, r
);
7406 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
7407 RTX_FRAME_RELATED_P (insn
) = 1;
7411 RTX_FRAME_RELATED_P (insn
) = 1;
7412 if (add_frame_related_expr
)
7414 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7415 r
= gen_rtx_SET (dest
, r
);
7416 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
7420 if (dest
== stack_pointer_rtx
)
7422 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
7423 bool valid
= m
->fs
.sp_valid
;
7424 bool realigned
= m
->fs
.sp_realigned
;
7426 if (src
== hard_frame_pointer_rtx
)
7428 valid
= m
->fs
.fp_valid
;
7430 ooffset
= m
->fs
.fp_offset
;
7432 else if (src
== crtl
->drap_reg
)
7434 valid
= m
->fs
.drap_valid
;
7440 /* Else there are two possibilities: SP itself, which we set
7441 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7442 taken care of this by hand along the eh_return path. */
7443 gcc_checking_assert (src
== stack_pointer_rtx
7444 || offset
== const0_rtx
);
7447 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
7448 m
->fs
.sp_valid
= valid
;
7449 m
->fs
.sp_realigned
= realigned
;
7454 /* Find an available register to be used as dynamic realign argument
7455 pointer regsiter. Such a register will be written in prologue and
7456 used in begin of body, so it must not be
7457 1. parameter passing register.
7459 We reuse static-chain register if it is available. Otherwise, we
7460 use DI for i386 and R13 for x86-64. We chose R13 since it has
7463 Return: the regno of chosen register. */
7466 find_drap_reg (void)
7468 tree decl
= cfun
->decl
;
7470 /* Always use callee-saved register if there are no caller-saved
7474 /* Use R13 for nested function or function need static chain.
7475 Since function with tail call may use any caller-saved
7476 registers in epilogue, DRAP must not use caller-saved
7477 register in such case. */
7478 if (DECL_STATIC_CHAIN (decl
)
7479 || cfun
->machine
->no_caller_saved_registers
7480 || crtl
->tail_call_emit
)
7487 /* Use DI for nested function or function need static chain.
7488 Since function with tail call may use any caller-saved
7489 registers in epilogue, DRAP must not use caller-saved
7490 register in such case. */
7491 if (DECL_STATIC_CHAIN (decl
)
7492 || cfun
->machine
->no_caller_saved_registers
7493 || crtl
->tail_call_emit
7494 || crtl
->calls_eh_return
)
7497 /* Reuse static chain register if it isn't used for parameter
7499 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
7501 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
7502 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
7509 /* Return minimum incoming stack alignment. */
7512 ix86_minimum_incoming_stack_boundary (bool sibcall
)
7514 unsigned int incoming_stack_boundary
;
7516 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7517 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7518 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
7519 /* Prefer the one specified at command line. */
7520 else if (ix86_user_incoming_stack_boundary
)
7521 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
7522 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7523 if -mstackrealign is used, it isn't used for sibcall check and
7524 estimated stack alignment is 128bit. */
7526 && ix86_force_align_arg_pointer
7527 && crtl
->stack_alignment_estimated
== 128)
7528 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7530 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
7532 /* Incoming stack alignment can be changed on individual functions
7533 via force_align_arg_pointer attribute. We use the smallest
7534 incoming stack boundary. */
7535 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
7536 && lookup_attribute ("force_align_arg_pointer",
7537 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
7538 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7540 /* The incoming stack frame has to be aligned at least at
7541 parm_stack_boundary. */
7542 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
7543 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
7545 /* Stack at entrance of main is aligned by runtime. We use the
7546 smallest incoming stack boundary. */
7547 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
7548 && DECL_NAME (current_function_decl
)
7549 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
7550 && DECL_FILE_SCOPE_P (current_function_decl
))
7551 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
7553 return incoming_stack_boundary
;
7556 /* Update incoming stack boundary and estimated stack alignment. */
7559 ix86_update_stack_boundary (void)
7561 ix86_incoming_stack_boundary
7562 = ix86_minimum_incoming_stack_boundary (false);
7564 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7567 && crtl
->stack_alignment_estimated
< 128)
7568 crtl
->stack_alignment_estimated
= 128;
7570 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7571 if (ix86_tls_descriptor_calls_expanded_in_cfun
7572 && crtl
->preferred_stack_boundary
< 128)
7573 crtl
->preferred_stack_boundary
= 128;
7576 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7577 needed or an rtx for DRAP otherwise. */
7580 ix86_get_drap_rtx (void)
7582 /* We must use DRAP if there are outgoing arguments on stack or
7583 the stack pointer register is clobbered by asm statment and
7584 ACCUMULATE_OUTGOING_ARGS is false. */
7586 || ((cfun
->machine
->outgoing_args_on_stack
7587 || crtl
->sp_is_clobbered_by_asm
)
7588 && !ACCUMULATE_OUTGOING_ARGS
))
7589 crtl
->need_drap
= true;
7591 if (stack_realign_drap
)
7593 /* Assign DRAP to vDRAP and returns vDRAP */
7594 unsigned int regno
= find_drap_reg ();
7597 rtx_insn
*seq
, *insn
;
7599 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
7600 crtl
->drap_reg
= arg_ptr
;
7603 drap_vreg
= copy_to_reg (arg_ptr
);
7607 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
7610 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
7611 RTX_FRAME_RELATED_P (insn
) = 1;
7619 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7622 ix86_internal_arg_pointer (void)
7624 return virtual_incoming_args_rtx
;
7627 struct scratch_reg
{
7632 /* Return a short-lived scratch register for use on function entry.
7633 In 32-bit mode, it is valid only after the registers are saved
7634 in the prologue. This register must be released by means of
7635 release_scratch_register_on_entry once it is dead. */
7638 get_scratch_register_on_entry (struct scratch_reg
*sr
)
7646 /* We always use R11 in 64-bit mode. */
7651 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
7653 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7655 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7656 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
7657 int regparm
= ix86_function_regparm (fntype
, decl
);
7659 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
7661 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7662 for the static chain register. */
7663 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
7664 && drap_regno
!= AX_REG
)
7666 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7667 for the static chain register. */
7668 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
7670 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
7672 /* ecx is the static chain register. */
7673 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
7675 && drap_regno
!= CX_REG
)
7677 else if (ix86_save_reg (BX_REG
, true, false))
7679 /* esi is the static chain register. */
7680 else if (!(regparm
== 3 && static_chain_p
)
7681 && ix86_save_reg (SI_REG
, true, false))
7683 else if (ix86_save_reg (DI_REG
, true, false))
7687 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
7692 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
7695 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
7696 RTX_FRAME_RELATED_P (insn
) = 1;
7700 /* Release a scratch register obtained from the preceding function.
7702 If RELEASE_VIA_POP is true, we just pop the register off the stack
7703 to release it. This is what non-Linux systems use with -fstack-check.
7705 Otherwise we use OFFSET to locate the saved register and the
7706 allocated stack space becomes part of the local frame and is
7707 deallocated by the epilogue. */
7710 release_scratch_register_on_entry (struct scratch_reg
*sr
, HOST_WIDE_INT offset
,
7711 bool release_via_pop
)
7715 if (release_via_pop
)
7717 struct machine_function
*m
= cfun
->machine
;
7718 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
7720 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7721 RTX_FRAME_RELATED_P (insn
) = 1;
7722 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
7723 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7724 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
7725 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
7729 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
7730 x
= gen_rtx_SET (sr
->reg
, gen_rtx_MEM (word_mode
, x
));
7736 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7738 If INT_REGISTERS_SAVED is true, then integer registers have already been
7739 pushed on the stack.
7741 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7744 This assumes no knowledge of the current probing state, i.e. it is never
7745 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7746 a suitable probe. */
7749 ix86_adjust_stack_and_probe (HOST_WIDE_INT size
,
7750 const bool int_registers_saved
,
7751 const bool protection_area
)
7753 struct machine_function
*m
= cfun
->machine
;
7755 /* If this function does not statically allocate stack space, then
7756 no probes are needed. */
7759 /* However, the allocation of space via pushes for register
7760 saves could be viewed as allocating space, but without the
7762 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
7763 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
7765 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
7769 /* If we are a noreturn function, then we have to consider the
7770 possibility that we're called via a jump rather than a call.
7772 Thus we don't have the implicit probe generated by saving the
7773 return address into the stack at the call. Thus, the stack
7774 pointer could be anywhere in the guard page. The safe thing
7775 to do is emit a probe now.
7777 The probe can be avoided if we have already emitted any callee
7778 register saves into the stack or have a frame pointer (which will
7779 have been saved as well). Those saves will function as implicit
7782 ?!? This should be revamped to work like aarch64 and s390 where
7783 we track the offset from the most recent probe. Normally that
7784 offset would be zero. For a noreturn function we would reset
7785 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7786 we just probe when we cross PROBE_INTERVAL. */
7787 if (TREE_THIS_VOLATILE (cfun
->decl
)
7788 && !(m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
))
7790 /* We can safely use any register here since we're just going to push
7791 its value and immediately pop it back. But we do try and avoid
7792 argument passing registers so as not to introduce dependencies in
7793 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7794 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
7795 rtx_insn
*insn_push
= emit_insn (gen_push (dummy_reg
));
7796 rtx_insn
*insn_pop
= emit_insn (gen_pop (dummy_reg
));
7797 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
7798 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7800 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
7801 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
7802 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7803 add_reg_note (insn_push
, REG_CFA_ADJUST_CFA
, x
);
7804 RTX_FRAME_RELATED_P (insn_push
) = 1;
7805 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
7806 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7807 add_reg_note (insn_pop
, REG_CFA_ADJUST_CFA
, x
);
7808 RTX_FRAME_RELATED_P (insn_pop
) = 1;
7810 emit_insn (gen_blockage ());
7813 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
7814 const int dope
= 4 * UNITS_PER_WORD
;
7816 /* If there is protection area, take it into account in the size. */
7817 if (protection_area
)
7818 size
+= probe_interval
+ dope
;
7820 /* If we allocate less than the size of the guard statically,
7821 then no probing is necessary, but we do need to allocate
7823 else if (size
< (1 << param_stack_clash_protection_guard_size
))
7825 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7826 GEN_INT (-size
), -1,
7827 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7828 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
7832 /* We're allocating a large enough stack frame that we need to
7833 emit probes. Either emit them inline or in a loop depending
7835 if (size
<= 4 * probe_interval
)
7838 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
7840 /* Allocate PROBE_INTERVAL bytes. */
7842 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7843 GEN_INT (-probe_interval
), -1,
7844 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7845 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
7847 /* And probe at *sp. */
7848 emit_stack_probe (stack_pointer_rtx
);
7849 emit_insn (gen_blockage ());
7852 /* We need to allocate space for the residual, but we do not need
7853 to probe the residual... */
7854 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
7857 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7858 GEN_INT (residual
), -1,
7859 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7861 /* ...except if there is a protection area to maintain. */
7862 if (protection_area
)
7863 emit_stack_probe (stack_pointer_rtx
);
7866 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
7870 /* We expect the GP registers to be saved when probes are used
7871 as the probing sequences might need a scratch register and
7872 the routine to allocate one assumes the integer registers
7873 have already been saved. */
7874 gcc_assert (int_registers_saved
);
7876 struct scratch_reg sr
;
7877 get_scratch_register_on_entry (&sr
);
7879 /* If we needed to save a register, then account for any space
7880 that was pushed (we are not going to pop the register when
7881 we do the restore). */
7883 size
-= UNITS_PER_WORD
;
7885 /* Step 1: round SIZE down to a multiple of the interval. */
7886 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
7888 /* Step 2: compute final value of the loop counter. Use lea if
7890 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
7892 if (address_no_seg_operand (addr
, Pmode
))
7893 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
7896 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
7897 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
7898 gen_rtx_PLUS (Pmode
, sr
.reg
,
7899 stack_pointer_rtx
)));
7901 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7903 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7904 plus_constant (Pmode
, sr
.reg
,
7905 m
->fs
.cfa_offset
+ rounded_size
));
7906 RTX_FRAME_RELATED_P (insn
) = 1;
7909 /* Step 3: the loop. */
7910 rtx size_rtx
= GEN_INT (rounded_size
);
7911 insn
= emit_insn (gen_adjust_stack_and_probe (Pmode
, sr
.reg
, sr
.reg
,
7913 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7915 m
->fs
.cfa_offset
+= rounded_size
;
7916 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7917 plus_constant (Pmode
, stack_pointer_rtx
,
7919 RTX_FRAME_RELATED_P (insn
) = 1;
7921 m
->fs
.sp_offset
+= rounded_size
;
7922 emit_insn (gen_blockage ());
7924 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7925 is equal to ROUNDED_SIZE. */
7927 if (size
!= rounded_size
)
7929 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7930 GEN_INT (rounded_size
- size
), -1,
7931 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7933 if (protection_area
)
7934 emit_stack_probe (stack_pointer_rtx
);
7937 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
7939 /* This does not deallocate the space reserved for the scratch
7940 register. That will be deallocated in the epilogue. */
7941 release_scratch_register_on_entry (&sr
, size
, false);
7944 /* Adjust back to account for the protection area. */
7945 if (protection_area
)
7946 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7947 GEN_INT (probe_interval
+ dope
), -1,
7948 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7950 /* Make sure nothing is scheduled before we are done. */
7951 emit_insn (gen_blockage ());
7954 /* Adjust the stack pointer up to REG while probing it. */
7957 output_adjust_stack_and_probe (rtx reg
)
7959 static int labelno
= 0;
7963 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7966 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7968 /* SP = SP + PROBE_INTERVAL. */
7969 xops
[0] = stack_pointer_rtx
;
7970 xops
[1] = GEN_INT (get_probe_interval ());
7971 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7974 xops
[1] = const0_rtx
;
7975 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
7977 /* Test if SP == LAST_ADDR. */
7978 xops
[0] = stack_pointer_rtx
;
7980 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7983 fputs ("\tjne\t", asm_out_file
);
7984 assemble_name_raw (asm_out_file
, loop_lab
);
7985 fputc ('\n', asm_out_file
);
7990 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7991 inclusive. These are offsets from the current stack pointer.
7993 INT_REGISTERS_SAVED is true if integer registers have already been
7994 pushed on the stack. */
7997 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
7998 const bool int_registers_saved
)
8000 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
8002 /* See if we have a constant small number of probes to generate. If so,
8003 that's the easy case. The run-time loop is made up of 6 insns in the
8004 generic case while the compile-time loop is made up of n insns for n #
8006 if (size
<= 6 * probe_interval
)
8010 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8011 it exceeds SIZE. If only one probe is needed, this will not
8012 generate any code. Then probe at FIRST + SIZE. */
8013 for (i
= probe_interval
; i
< size
; i
+= probe_interval
)
8014 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
8017 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
8021 /* Otherwise, do the same as above, but in a loop. Note that we must be
8022 extra careful with variables wrapping around because we might be at
8023 the very top (or the very bottom) of the address space and we have
8024 to be able to handle this case properly; in particular, we use an
8025 equality test for the loop condition. */
8028 /* We expect the GP registers to be saved when probes are used
8029 as the probing sequences might need a scratch register and
8030 the routine to allocate one assumes the integer registers
8031 have already been saved. */
8032 gcc_assert (int_registers_saved
);
8034 HOST_WIDE_INT rounded_size
, last
;
8035 struct scratch_reg sr
;
8037 get_scratch_register_on_entry (&sr
);
8040 /* Step 1: round SIZE to the previous multiple of the interval. */
8042 rounded_size
= ROUND_DOWN (size
, probe_interval
);
8045 /* Step 2: compute initial and final value of the loop counter. */
8047 /* TEST_OFFSET = FIRST. */
8048 emit_move_insn (sr
.reg
, GEN_INT (-first
));
8050 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8051 last
= first
+ rounded_size
;
8058 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8061 while (TEST_ADDR != LAST_ADDR)
8063 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8064 until it is equal to ROUNDED_SIZE. */
8067 (gen_probe_stack_range (Pmode
, sr
.reg
, sr
.reg
, GEN_INT (-last
)));
8070 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8071 that SIZE is equal to ROUNDED_SIZE. */
8073 if (size
!= rounded_size
)
8074 emit_stack_probe (plus_constant (Pmode
,
8075 gen_rtx_PLUS (Pmode
,
8078 rounded_size
- size
));
8080 release_scratch_register_on_entry (&sr
, size
, true);
8083 /* Make sure nothing is scheduled before we are done. */
8084 emit_insn (gen_blockage ());
8087 /* Probe a range of stack addresses from REG to END, inclusive. These are
8088 offsets from the current stack pointer. */
8091 output_probe_stack_range (rtx reg
, rtx end
)
8093 static int labelno
= 0;
8097 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
8100 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
8102 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8104 xops
[1] = GEN_INT (get_probe_interval ());
8105 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
8107 /* Probe at TEST_ADDR. */
8108 xops
[0] = stack_pointer_rtx
;
8110 xops
[2] = const0_rtx
;
8111 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
8113 /* Test if TEST_ADDR == LAST_ADDR. */
8116 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
8119 fputs ("\tjne\t", asm_out_file
);
8120 assemble_name_raw (asm_out_file
, loop_lab
);
8121 fputc ('\n', asm_out_file
);
8126 /* Set stack_frame_required to false if stack frame isn't required.
8127 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8128 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8131 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment
,
8132 bool check_stack_slot
)
8134 HARD_REG_SET set_up_by_prologue
, prologue_used
;
8137 CLEAR_HARD_REG_SET (prologue_used
);
8138 CLEAR_HARD_REG_SET (set_up_by_prologue
);
8139 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
8140 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
8141 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
8142 HARD_FRAME_POINTER_REGNUM
);
8144 /* The preferred stack alignment is the minimum stack alignment. */
8145 if (stack_alignment
> crtl
->preferred_stack_boundary
)
8146 stack_alignment
= crtl
->preferred_stack_boundary
;
8148 bool require_stack_frame
= false;
8150 FOR_EACH_BB_FN (bb
, cfun
)
8153 FOR_BB_INSNS (bb
, insn
)
8154 if (NONDEBUG_INSN_P (insn
)
8155 && requires_stack_frame_p (insn
, prologue_used
,
8156 set_up_by_prologue
))
8158 require_stack_frame
= true;
8160 if (check_stack_slot
)
8162 /* Find the maximum stack alignment. */
8163 subrtx_iterator::array_type array
;
8164 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
8166 && (reg_mentioned_p (stack_pointer_rtx
,
8168 || reg_mentioned_p (frame_pointer_rtx
,
8171 unsigned int alignment
= MEM_ALIGN (*iter
);
8172 if (alignment
> stack_alignment
)
8173 stack_alignment
= alignment
;
8179 cfun
->machine
->stack_frame_required
= require_stack_frame
;
8182 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8183 will guide prologue/epilogue to be generated in correct form. */
8186 ix86_finalize_stack_frame_flags (void)
8188 /* Check if stack realign is really needed after reload, and
8189 stores result in cfun */
8190 unsigned int incoming_stack_boundary
8191 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
8192 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
8193 unsigned int stack_alignment
8194 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
8195 ? crtl
->max_used_stack_slot_alignment
8196 : crtl
->stack_alignment_needed
);
8197 unsigned int stack_realign
8198 = (incoming_stack_boundary
< stack_alignment
);
8199 bool recompute_frame_layout_p
= false;
8201 if (crtl
->stack_realign_finalized
)
8203 /* After stack_realign_needed is finalized, we can't no longer
8205 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
8209 /* It is always safe to compute max_used_stack_alignment. We
8210 compute it only if 128-bit aligned load/store may be generated
8211 on misaligned stack slot which will lead to segfault. */
8212 bool check_stack_slot
8213 = (stack_realign
|| crtl
->max_used_stack_slot_alignment
>= 128);
8214 ix86_find_max_used_stack_alignment (stack_alignment
,
8217 /* If the only reason for frame_pointer_needed is that we conservatively
8218 assumed stack realignment might be needed or -fno-omit-frame-pointer
8219 is used, but in the end nothing that needed the stack alignment had
8220 been spilled nor stack access, clear frame_pointer_needed and say we
8221 don't need stack realignment.
8223 When vector register is used for piecewise move and store, we don't
8224 increase stack_alignment_needed as there is no register spill for
8225 piecewise move and store. Since stack_realign_needed is set to true
8226 by checking stack_alignment_estimated which is updated by pseudo
8227 vector register usage, we also need to check stack_realign_needed to
8228 eliminate frame pointer. */
8230 || (!flag_omit_frame_pointer
&& optimize
)
8231 || crtl
->stack_realign_needed
)
8232 && frame_pointer_needed
8234 && crtl
->sp_is_unchanging
8235 && !ix86_current_function_calls_tls_descriptor
8236 && !crtl
->accesses_prior_frames
8237 && !cfun
->calls_alloca
8238 && !crtl
->calls_eh_return
8239 /* See ira_setup_eliminable_regset for the rationale. */
8240 && !(STACK_CHECK_MOVING_SP
8243 && cfun
->can_throw_non_call_exceptions
)
8244 && !ix86_frame_pointer_required ()
8245 && ix86_get_frame_size () == 0
8246 && ix86_nsaved_sseregs () == 0
8247 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
8249 if (cfun
->machine
->stack_frame_required
)
8251 /* Stack frame is required. If stack alignment needed is less
8252 than incoming stack boundary, don't realign stack. */
8253 stack_realign
= incoming_stack_boundary
< stack_alignment
;
8256 crtl
->max_used_stack_slot_alignment
8257 = incoming_stack_boundary
;
8258 crtl
->stack_alignment_needed
8259 = incoming_stack_boundary
;
8260 /* Also update preferred_stack_boundary for leaf
8262 crtl
->preferred_stack_boundary
8263 = incoming_stack_boundary
;
8268 /* If drap has been set, but it actually isn't live at the
8269 start of the function, there is no reason to set it up. */
8272 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
8273 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
8274 REGNO (crtl
->drap_reg
)))
8276 crtl
->drap_reg
= NULL_RTX
;
8277 crtl
->need_drap
= false;
8281 cfun
->machine
->no_drap_save_restore
= true;
8283 frame_pointer_needed
= false;
8284 stack_realign
= false;
8285 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
8286 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
8287 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
8288 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
8289 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
8290 df_finish_pass (true);
8291 df_scan_alloc (NULL
);
8293 df_compute_regs_ever_live (true);
8296 if (flag_var_tracking
)
8298 /* Since frame pointer is no longer available, replace it with
8299 stack pointer - UNITS_PER_WORD in debug insns. */
8301 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
8304 next
= DF_REF_NEXT_REG (ref
);
8305 if (!DF_REF_INSN_INFO (ref
))
8308 /* Make sure the next ref is for a different instruction,
8309 so that we're not affected by the rescan. */
8310 rtx_insn
*insn
= DF_REF_INSN (ref
);
8311 while (next
&& DF_REF_INSN (next
) == insn
)
8312 next
= DF_REF_NEXT_REG (next
);
8314 if (DEBUG_INSN_P (insn
))
8316 bool changed
= false;
8317 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
8319 rtx
*loc
= DF_REF_LOC (ref
);
8320 if (*loc
== hard_frame_pointer_rtx
)
8322 *loc
= plus_constant (Pmode
,
8329 df_insn_rescan (insn
);
8334 recompute_frame_layout_p
= true;
8337 else if (crtl
->max_used_stack_slot_alignment
>= 128
8338 && cfun
->machine
->stack_frame_required
)
8340 /* We don't need to realign stack. max_used_stack_alignment is
8341 used to decide how stack frame should be aligned. This is
8342 independent of any psABIs nor 32-bit vs 64-bit. */
8343 cfun
->machine
->max_used_stack_alignment
8344 = stack_alignment
/ BITS_PER_UNIT
;
8347 if (crtl
->stack_realign_needed
!= stack_realign
)
8348 recompute_frame_layout_p
= true;
8349 crtl
->stack_realign_needed
= stack_realign
;
8350 crtl
->stack_realign_finalized
= true;
8351 if (recompute_frame_layout_p
)
8352 ix86_compute_frame_layout ();
8355 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8358 ix86_elim_entry_set_got (rtx reg
)
8360 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
8361 rtx_insn
*c_insn
= BB_HEAD (bb
);
8362 if (!NONDEBUG_INSN_P (c_insn
))
8363 c_insn
= next_nonnote_nondebug_insn (c_insn
);
8364 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
8366 rtx pat
= PATTERN (c_insn
);
8367 if (GET_CODE (pat
) == PARALLEL
)
8369 rtx vec
= XVECEXP (pat
, 0, 0);
8370 if (GET_CODE (vec
) == SET
8371 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
8372 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
8373 delete_insn (c_insn
);
8379 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
8384 addr
= plus_constant (Pmode
, frame_reg
, offset
);
8385 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
8386 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
8390 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
8392 return gen_frame_set (reg
, frame_reg
, offset
, false);
8396 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
8398 return gen_frame_set (reg
, frame_reg
, offset
, true);
8402 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
8404 struct machine_function
*m
= cfun
->machine
;
8405 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8406 + m
->call_ms2sysv_extra_regs
;
8407 rtvec v
= rtvec_alloc (ncregs
+ 1);
8408 unsigned int align
, i
, vi
= 0;
8411 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
8412 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8414 /* AL should only be live with sysv_abi. */
8415 gcc_assert (!ix86_eax_live_at_start_p ());
8416 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
8418 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8419 we've actually realigned the stack or not. */
8420 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8421 addr
= choose_baseaddr (frame
.stack_realign_offset
8422 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
8423 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8425 emit_insn (gen_rtx_SET (rax
, addr
));
8427 /* Get the stub symbol. */
8428 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
8429 : XLOGUE_STUB_SAVE
);
8430 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8432 for (i
= 0; i
< ncregs
; ++i
)
8434 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8435 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
8437 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
8440 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
8442 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
8443 RTX_FRAME_RELATED_P (insn
) = true;
8446 /* Generate and return an insn body to AND X with Y. */
8449 gen_and2_insn (rtx x
, rtx y
)
8451 enum insn_code icode
= optab_handler (and_optab
, GET_MODE (x
));
8453 gcc_assert (insn_operand_matches (icode
, 0, x
));
8454 gcc_assert (insn_operand_matches (icode
, 1, x
));
8455 gcc_assert (insn_operand_matches (icode
, 2, y
));
8457 return GEN_FCN (icode
) (x
, x
, y
);
8460 /* Expand the prologue into a bunch of separate insns. */
8463 ix86_expand_prologue (void)
8465 struct machine_function
*m
= cfun
->machine
;
8467 HOST_WIDE_INT allocate
;
8468 bool int_registers_saved
;
8469 bool sse_registers_saved
;
8470 bool save_stub_call_needed
;
8471 rtx static_chain
= NULL_RTX
;
8473 ix86_last_zero_store_uid
= 0;
8474 if (ix86_function_naked (current_function_decl
))
8476 if (flag_stack_usage_info
)
8477 current_function_static_stack_size
= 0;
8481 ix86_finalize_stack_frame_flags ();
8483 /* DRAP should not coexist with stack_realign_fp */
8484 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
8486 memset (&m
->fs
, 0, sizeof (m
->fs
));
8488 /* Initialize CFA state for before the prologue. */
8489 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8490 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
8492 /* Track SP offset to the CFA. We continue tracking this after we've
8493 swapped the CFA register away from SP. In the case of re-alignment
8494 this is fudged; we're interested to offsets within the local frame. */
8495 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8496 m
->fs
.sp_valid
= true;
8497 m
->fs
.sp_realigned
= false;
8499 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
8501 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
8503 /* We should have already generated an error for any use of
8504 ms_hook on a nested function. */
8505 gcc_checking_assert (!ix86_static_chain_on_stack
);
8507 /* Check if profiling is active and we shall use profiling before
8508 prologue variant. If so sorry. */
8509 if (crtl
->profile
&& flag_fentry
!= 0)
8510 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8511 "with %<-mfentry%> for 32-bit");
8513 /* In ix86_asm_output_function_label we emitted:
8514 8b ff movl.s %edi,%edi
8516 8b ec movl.s %esp,%ebp
8518 This matches the hookable function prologue in Win32 API
8519 functions in Microsoft Windows XP Service Pack 2 and newer.
8520 Wine uses this to enable Windows apps to hook the Win32 API
8521 functions provided by Wine.
8523 What that means is that we've already set up the frame pointer. */
8525 if (frame_pointer_needed
8526 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
8530 /* We've decided to use the frame pointer already set up.
8531 Describe this to the unwinder by pretending that both
8532 push and mov insns happen right here.
8534 Putting the unwind info here at the end of the ms_hook
8535 is done so that we can make absolutely certain we get
8536 the required byte sequence at the start of the function,
8537 rather than relying on an assembler that can produce
8538 the exact encoding required.
8540 However it does mean (in the unpatched case) that we have
8541 a 1 insn window where the asynchronous unwind info is
8542 incorrect. However, if we placed the unwind info at
8543 its correct location we would have incorrect unwind info
8544 in the patched case. Which is probably all moot since
8545 I don't expect Wine generates dwarf2 unwind info for the
8546 system libraries that use this feature. */
8548 insn
= emit_insn (gen_blockage ());
8550 push
= gen_push (hard_frame_pointer_rtx
);
8551 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
8553 RTX_FRAME_RELATED_P (push
) = 1;
8554 RTX_FRAME_RELATED_P (mov
) = 1;
8556 RTX_FRAME_RELATED_P (insn
) = 1;
8557 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8558 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
8560 /* Note that gen_push incremented m->fs.cfa_offset, even
8561 though we didn't emit the push insn here. */
8562 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8563 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
8564 m
->fs
.fp_valid
= true;
8568 /* The frame pointer is not needed so pop %ebp again.
8569 This leaves us with a pristine state. */
8570 emit_insn (gen_pop (hard_frame_pointer_rtx
));
8574 /* The first insn of a function that accepts its static chain on the
8575 stack is to push the register that would be filled in by a direct
8576 call. This insn will be skipped by the trampoline. */
8577 else if (ix86_static_chain_on_stack
)
8579 static_chain
= ix86_static_chain (cfun
->decl
, false);
8580 insn
= emit_insn (gen_push (static_chain
));
8581 emit_insn (gen_blockage ());
8583 /* We don't want to interpret this push insn as a register save,
8584 only as a stack adjustment. The real copy of the register as
8585 a save will be done later, if needed. */
8586 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
8587 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
8588 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
8589 RTX_FRAME_RELATED_P (insn
) = 1;
8592 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8593 of DRAP is needed and stack realignment is really needed after reload */
8594 if (stack_realign_drap
)
8596 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8598 /* Can't use DRAP in interrupt function. */
8599 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
8600 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8601 "in interrupt service routine. This may be worked "
8602 "around by avoiding functions with aggregate return.");
8604 /* Only need to push parameter pointer reg if it is caller saved. */
8605 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8607 /* Push arg pointer reg */
8608 insn
= emit_insn (gen_push (crtl
->drap_reg
));
8609 RTX_FRAME_RELATED_P (insn
) = 1;
8612 /* Grab the argument pointer. */
8613 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
8614 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8615 RTX_FRAME_RELATED_P (insn
) = 1;
8616 m
->fs
.cfa_reg
= crtl
->drap_reg
;
8617 m
->fs
.cfa_offset
= 0;
8619 /* Align the stack. */
8620 insn
= emit_insn (gen_and2_insn (stack_pointer_rtx
,
8621 GEN_INT (-align_bytes
)));
8622 RTX_FRAME_RELATED_P (insn
) = 1;
8624 /* Replicate the return address on the stack so that return
8625 address can be reached via (argp - 1) slot. This is needed
8626 to implement macro RETURN_ADDR_RTX and intrinsic function
8627 expand_builtin_return_addr etc. */
8628 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
8629 t
= gen_frame_mem (word_mode
, t
);
8630 insn
= emit_insn (gen_push (t
));
8631 RTX_FRAME_RELATED_P (insn
) = 1;
8633 /* For the purposes of frame and register save area addressing,
8634 we've started over with a new frame. */
8635 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8636 m
->fs
.realigned
= true;
8640 /* Replicate static chain on the stack so that static chain
8641 can be reached via (argp - 2) slot. This is needed for
8642 nested function with stack realignment. */
8643 insn
= emit_insn (gen_push (static_chain
));
8644 RTX_FRAME_RELATED_P (insn
) = 1;
8648 int_registers_saved
= (frame
.nregs
== 0);
8649 sse_registers_saved
= (frame
.nsseregs
== 0);
8650 save_stub_call_needed
= (m
->call_ms2sysv
);
8651 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
8653 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8655 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8656 slower on all targets. Also sdb didn't like it. */
8657 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
8658 RTX_FRAME_RELATED_P (insn
) = 1;
8660 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
8662 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
8663 RTX_FRAME_RELATED_P (insn
) = 1;
8665 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8666 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8667 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
8668 m
->fs
.fp_valid
= true;
8672 if (!int_registers_saved
)
8674 /* If saving registers via PUSH, do so now. */
8675 if (!frame
.save_regs_using_mov
)
8677 ix86_emit_save_regs ();
8678 int_registers_saved
= true;
8679 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
8682 /* When using red zone we may start register saving before allocating
8683 the stack frame saving one cycle of the prologue. However, avoid
8684 doing this if we have to probe the stack; at least on x86_64 the
8685 stack probe can turn into a call that clobbers a red zone location. */
8686 else if (ix86_using_red_zone ()
8687 && (! TARGET_STACK_PROBE
8688 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
8690 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8691 cfun
->machine
->red_zone_used
= true;
8692 int_registers_saved
= true;
8696 if (frame
.red_zone_size
!= 0)
8697 cfun
->machine
->red_zone_used
= true;
8699 if (stack_realign_fp
)
8701 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8702 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
8704 /* Record last valid frame pointer offset. */
8705 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
8707 /* The computation of the size of the re-aligned stack frame means
8708 that we must allocate the size of the register save area before
8709 performing the actual alignment. Otherwise we cannot guarantee
8710 that there's enough storage above the realignment point. */
8711 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
8712 + frame
.stack_realign_allocate
;
8714 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8715 GEN_INT (-allocate
), -1, false);
8717 /* Align the stack. */
8718 emit_insn (gen_and2_insn (stack_pointer_rtx
, GEN_INT (-align_bytes
)));
8719 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
8720 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
8721 - frame
.stack_realign_allocate
;
8722 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8723 Beyond this point, stack access should be done via choose_baseaddr or
8724 by using sp_valid_at and fp_valid_at to determine the correct base
8725 register. Henceforth, any CFA offset should be thought of as logical
8726 and not physical. */
8727 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
8728 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
8729 m
->fs
.sp_realigned
= true;
8731 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8732 is needed to describe where a register is saved using a realigned
8733 stack pointer, so we need to invalidate the stack pointer for that
8736 m
->fs
.sp_valid
= false;
8738 /* If SP offset is non-immediate after allocation of the stack frame,
8739 then emit SSE saves or stub call prior to allocating the rest of the
8740 stack frame. This is less efficient for the out-of-line stub because
8741 we can't combine allocations across the call barrier, but it's better
8742 than using a scratch register. */
8743 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
8744 - m
->fs
.sp_realigned_offset
),
8747 if (!sse_registers_saved
)
8749 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8750 sse_registers_saved
= true;
8752 else if (save_stub_call_needed
)
8754 ix86_emit_outlined_ms2sysv_save (frame
);
8755 save_stub_call_needed
= false;
8760 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
8762 if (flag_stack_usage_info
)
8764 /* We start to count from ARG_POINTER. */
8765 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
8767 /* If it was realigned, take into account the fake frame. */
8768 if (stack_realign_drap
)
8770 if (ix86_static_chain_on_stack
)
8771 stack_size
+= UNITS_PER_WORD
;
8773 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8774 stack_size
+= UNITS_PER_WORD
;
8776 /* This over-estimates by 1 minimal-stack-alignment-unit but
8777 mitigates that by counting in the new return address slot. */
8778 current_function_dynamic_stack_size
8779 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8782 current_function_static_stack_size
= stack_size
;
8785 /* On SEH target with very large frame size, allocate an area to save
8786 SSE registers (as the very large allocation won't be described). */
8788 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
8789 && !sse_registers_saved
)
8791 HOST_WIDE_INT sse_size
8792 = frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
8794 gcc_assert (int_registers_saved
);
8796 /* No need to do stack checking as the area will be immediately
8798 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8799 GEN_INT (-sse_size
), -1,
8800 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8801 allocate
-= sse_size
;
8802 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8803 sse_registers_saved
= true;
8806 /* If stack clash protection is requested, then probe the stack, unless it
8807 is already probed on the target. */
8809 && flag_stack_clash_protection
8810 && !ix86_target_stack_probe ())
8812 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, false);
8816 /* The stack has already been decremented by the instruction calling us
8817 so probe if the size is non-negative to preserve the protection area. */
8818 else if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
8820 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
8822 if (STACK_CHECK_MOVING_SP
)
8825 && !cfun
->calls_alloca
8826 && allocate
<= probe_interval
)
8831 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, true);
8838 HOST_WIDE_INT size
= allocate
;
8840 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
8841 size
= 0x80000000 - get_stack_check_protect () - 1;
8843 if (TARGET_STACK_PROBE
)
8845 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8847 if (size
> probe_interval
)
8848 ix86_emit_probe_stack_range (0, size
, int_registers_saved
);
8851 ix86_emit_probe_stack_range (0,
8852 size
+ get_stack_check_protect (),
8853 int_registers_saved
);
8857 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8859 if (size
> probe_interval
8860 && size
> get_stack_check_protect ())
8861 ix86_emit_probe_stack_range (get_stack_check_protect (),
8863 - get_stack_check_protect ()),
8864 int_registers_saved
);
8867 ix86_emit_probe_stack_range (get_stack_check_protect (), size
,
8868 int_registers_saved
);
8875 else if (!ix86_target_stack_probe ()
8876 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
8878 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8879 GEN_INT (-allocate
), -1,
8880 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8884 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
8886 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8887 bool eax_live
= ix86_eax_live_at_start_p ();
8888 bool r10_live
= false;
8891 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
8895 insn
= emit_insn (gen_push (eax
));
8896 allocate
-= UNITS_PER_WORD
;
8897 /* Note that SEH directives need to continue tracking the stack
8898 pointer even after the frame pointer has been set up. */
8899 if (sp_is_cfa_reg
|| TARGET_SEH
)
8902 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8903 RTX_FRAME_RELATED_P (insn
) = 1;
8904 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8905 gen_rtx_SET (stack_pointer_rtx
,
8906 plus_constant (Pmode
,
8914 r10
= gen_rtx_REG (Pmode
, R10_REG
);
8915 insn
= emit_insn (gen_push (r10
));
8916 allocate
-= UNITS_PER_WORD
;
8917 if (sp_is_cfa_reg
|| TARGET_SEH
)
8920 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8921 RTX_FRAME_RELATED_P (insn
) = 1;
8922 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8923 gen_rtx_SET (stack_pointer_rtx
,
8924 plus_constant (Pmode
,
8930 emit_move_insn (eax
, GEN_INT (allocate
));
8931 emit_insn (gen_allocate_stack_worker_probe (Pmode
, eax
, eax
));
8933 /* Use the fact that AX still contains ALLOCATE. */
8934 insn
= emit_insn (gen_pro_epilogue_adjust_stack_sub
8935 (Pmode
, stack_pointer_rtx
, stack_pointer_rtx
, eax
));
8937 if (sp_is_cfa_reg
|| TARGET_SEH
)
8940 m
->fs
.cfa_offset
+= allocate
;
8941 RTX_FRAME_RELATED_P (insn
) = 1;
8942 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8943 gen_rtx_SET (stack_pointer_rtx
,
8944 plus_constant (Pmode
, stack_pointer_rtx
,
8947 m
->fs
.sp_offset
+= allocate
;
8949 /* Use stack_pointer_rtx for relative addressing so that code works for
8950 realigned stack. But this means that we need a blockage to prevent
8951 stores based on the frame pointer from being scheduled before. */
8952 if (r10_live
&& eax_live
)
8954 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8955 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
8956 gen_frame_mem (word_mode
, t
));
8957 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
8958 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
8959 gen_frame_mem (word_mode
, t
));
8960 emit_insn (gen_memory_blockage ());
8962 else if (eax_live
|| r10_live
)
8964 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8965 emit_move_insn (gen_rtx_REG (word_mode
,
8966 (eax_live
? AX_REG
: R10_REG
)),
8967 gen_frame_mem (word_mode
, t
));
8968 emit_insn (gen_memory_blockage ());
8971 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8973 /* If we havn't already set up the frame pointer, do so now. */
8974 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8976 insn
= gen_add3_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
,
8977 GEN_INT (frame
.stack_pointer_offset
8978 - frame
.hard_frame_pointer_offset
));
8979 insn
= emit_insn (insn
);
8980 RTX_FRAME_RELATED_P (insn
) = 1;
8981 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
8983 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8984 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8985 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
8986 m
->fs
.fp_valid
= true;
8989 if (!int_registers_saved
)
8990 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8991 if (!sse_registers_saved
)
8992 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8993 else if (save_stub_call_needed
)
8994 ix86_emit_outlined_ms2sysv_save (frame
);
8996 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8998 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
9000 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9001 insn
= emit_insn (gen_set_got (pic
));
9002 RTX_FRAME_RELATED_P (insn
) = 1;
9003 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
9004 emit_insn (gen_prologue_use (pic
));
9005 /* Deleting already emmitted SET_GOT if exist and allocated to
9006 REAL_PIC_OFFSET_TABLE_REGNUM. */
9007 ix86_elim_entry_set_got (pic
);
9010 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
9012 /* vDRAP is setup but after reload it turns out stack realign
9013 isn't necessary, here we will emit prologue to setup DRAP
9014 without stack realign adjustment */
9015 t
= choose_baseaddr (0, NULL
);
9016 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
9019 /* Prevent instructions from being scheduled into register save push
9020 sequence when access to the redzone area is done through frame pointer.
9021 The offset between the frame pointer and the stack pointer is calculated
9022 relative to the value of the stack pointer at the end of the function
9023 prologue, and moving instructions that access redzone area via frame
9024 pointer inside push sequence violates this assumption. */
9025 if (frame_pointer_needed
&& frame
.red_zone_size
)
9026 emit_insn (gen_memory_blockage ());
9028 /* SEH requires that the prologue end within 256 bytes of the start of
9029 the function. Prevent instruction schedules that would extend that.
9030 Further, prevent alloca modifications to the stack pointer from being
9031 combined with prologue modifications. */
9033 emit_insn (gen_prologue_use (stack_pointer_rtx
));
9036 /* Emit code to restore REG using a POP insn. */
9039 ix86_emit_restore_reg_using_pop (rtx reg
)
9041 struct machine_function
*m
= cfun
->machine
;
9042 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
9044 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
9045 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9047 if (m
->fs
.cfa_reg
== crtl
->drap_reg
9048 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
9050 /* Previously we'd represented the CFA as an expression
9051 like *(%ebp - 8). We've just popped that value from
9052 the stack, which means we need to reset the CFA to
9053 the drap register. This will remain until we restore
9054 the stack pointer. */
9055 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
9056 RTX_FRAME_RELATED_P (insn
) = 1;
9058 /* This means that the DRAP register is valid for addressing too. */
9059 m
->fs
.drap_valid
= true;
9063 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9065 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9066 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9067 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9068 RTX_FRAME_RELATED_P (insn
) = 1;
9070 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9073 /* When the frame pointer is the CFA, and we pop it, we are
9074 swapping back to the stack pointer as the CFA. This happens
9075 for stack frames that don't allocate other data, so we assume
9076 the stack pointer is now pointing at the return address, i.e.
9077 the function entry state, which makes the offset be 1 word. */
9078 if (reg
== hard_frame_pointer_rtx
)
9080 m
->fs
.fp_valid
= false;
9081 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
9083 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9084 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9086 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9087 plus_constant (Pmode
, stack_pointer_rtx
,
9089 RTX_FRAME_RELATED_P (insn
) = 1;
9094 /* Emit code to restore saved registers using POP insns. */
9097 ix86_emit_restore_regs_using_pop (void)
9101 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9102 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
9103 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
9106 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9107 omits the emit and only attaches the notes. */
9110 ix86_emit_leave (rtx_insn
*insn
)
9112 struct machine_function
*m
= cfun
->machine
;
9115 insn
= emit_insn (gen_leave (word_mode
));
9117 ix86_add_queued_cfa_restore_notes (insn
);
9119 gcc_assert (m
->fs
.fp_valid
);
9120 m
->fs
.sp_valid
= true;
9121 m
->fs
.sp_realigned
= false;
9122 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
9123 m
->fs
.fp_valid
= false;
9125 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
9127 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9128 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
9130 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9131 plus_constant (Pmode
, stack_pointer_rtx
,
9133 RTX_FRAME_RELATED_P (insn
) = 1;
9135 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
9139 /* Emit code to restore saved registers using MOV insns.
9140 First register is restored from CFA - CFA_OFFSET. */
9142 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
9143 bool maybe_eh_return
)
9145 struct machine_function
*m
= cfun
->machine
;
9148 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9149 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
9151 rtx reg
= gen_rtx_REG (word_mode
, regno
);
9155 mem
= choose_baseaddr (cfa_offset
, NULL
);
9156 mem
= gen_frame_mem (word_mode
, mem
);
9157 insn
= emit_move_insn (reg
, mem
);
9159 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9161 /* Previously we'd represented the CFA as an expression
9162 like *(%ebp - 8). We've just popped that value from
9163 the stack, which means we need to reset the CFA to
9164 the drap register. This will remain until we restore
9165 the stack pointer. */
9166 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
9167 RTX_FRAME_RELATED_P (insn
) = 1;
9169 /* This means that the DRAP register is valid for addressing. */
9170 m
->fs
.drap_valid
= true;
9173 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
9175 cfa_offset
-= UNITS_PER_WORD
;
9179 /* Emit code to restore saved registers using MOV insns.
9180 First register is restored from CFA - CFA_OFFSET. */
9182 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
9183 bool maybe_eh_return
)
9187 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9188 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
9190 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
9192 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
9194 mem
= choose_baseaddr (cfa_offset
, &align
);
9195 mem
= gen_rtx_MEM (V4SFmode
, mem
);
9197 /* The location aligment depends upon the base register. */
9198 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
9199 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
9200 set_mem_align (mem
, align
);
9201 emit_insn (gen_rtx_SET (reg
, mem
));
9203 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
9205 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
9210 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
9211 bool use_call
, int style
)
9213 struct machine_function
*m
= cfun
->machine
;
9214 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
9215 + m
->call_ms2sysv_extra_regs
;
9217 unsigned int elems_needed
, align
, i
, vi
= 0;
9220 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
9222 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
9223 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
9224 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
9225 rtx rsi_frame_load
= NULL_RTX
;
9226 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
9227 enum xlogue_stub stub
;
9229 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
9231 /* If using a realigned stack, we should never start with padding. */
9232 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
9234 /* Setup RSI as the stub's base pointer. */
9235 align
= GET_MODE_ALIGNMENT (V4SFmode
);
9236 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
9237 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
9239 emit_insn (gen_rtx_SET (rsi
, tmp
));
9241 /* Get a symbol for the stub. */
9242 if (frame_pointer_needed
)
9243 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
9244 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
9246 stub
= use_call
? XLOGUE_STUB_RESTORE
9247 : XLOGUE_STUB_RESTORE_TAIL
;
9248 sym
= xlogue
.get_stub_rtx (stub
);
9250 elems_needed
= ncregs
;
9254 elems_needed
+= frame_pointer_needed
? 5 : 3;
9255 v
= rtvec_alloc (elems_needed
);
9257 /* We call the epilogue stub when we need to pop incoming args or we are
9258 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9259 epilogue stub and it is the tail-call. */
9261 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
9264 RTVEC_ELT (v
, vi
++) = ret_rtx
;
9265 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
9266 if (frame_pointer_needed
)
9268 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
9269 gcc_assert (m
->fs
.fp_valid
);
9270 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
9272 tmp
= plus_constant (DImode
, rbp
, 8);
9273 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
9274 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
9275 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
9276 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
9280 /* If no hard frame pointer, we set R10 to the SP restore value. */
9281 gcc_assert (!m
->fs
.fp_valid
);
9282 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9283 gcc_assert (m
->fs
.sp_valid
);
9285 r10
= gen_rtx_REG (DImode
, R10_REG
);
9286 tmp
= plus_constant (Pmode
, rsi
, stub_ptr_offset
);
9287 emit_insn (gen_rtx_SET (r10
, tmp
));
9289 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
9293 /* Generate frame load insns and restore notes. */
9294 for (i
= 0; i
< ncregs
; ++i
)
9296 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
9297 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
9298 rtx reg
, frame_load
;
9300 reg
= gen_rtx_REG (mode
, r
.regno
);
9301 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
9303 /* Save RSI frame load insn & note to add last. */
9304 if (r
.regno
== SI_REG
)
9306 gcc_assert (!rsi_frame_load
);
9307 rsi_frame_load
= frame_load
;
9308 rsi_restore_offset
= r
.offset
;
9312 RTVEC_ELT (v
, vi
++) = frame_load
;
9313 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
9317 /* Add RSI frame load & restore note at the end. */
9318 gcc_assert (rsi_frame_load
);
9319 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
9320 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
9321 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
9322 rsi_restore_offset
);
9324 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9325 if (!use_call
&& !frame_pointer_needed
)
9327 gcc_assert (m
->fs
.sp_valid
);
9328 gcc_assert (!m
->fs
.sp_realigned
);
9330 /* At this point, R10 should point to frame.stack_realign_offset. */
9331 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9332 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9333 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
9336 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
9337 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
9339 insn
= emit_insn (tmp
);
9342 insn
= emit_jump_insn (tmp
);
9343 JUMP_LABEL (insn
) = ret_rtx
;
9345 if (frame_pointer_needed
)
9346 ix86_emit_leave (insn
);
9349 /* Need CFA adjust note. */
9350 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
9351 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
9355 RTX_FRAME_RELATED_P (insn
) = true;
9356 ix86_add_queued_cfa_restore_notes (insn
);
9358 /* If we're not doing a tail-call, we need to adjust the stack. */
9359 if (use_call
&& m
->fs
.sp_valid
)
9361 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9362 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9363 GEN_INT (dealloc
), style
,
9364 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9368 /* Restore function stack, frame, and registers. */
9371 ix86_expand_epilogue (int style
)
9373 struct machine_function
*m
= cfun
->machine
;
9374 struct machine_frame_state frame_state_save
= m
->fs
;
9375 bool restore_regs_via_mov
;
9377 bool restore_stub_is_tail
= false;
9379 if (ix86_function_naked (current_function_decl
))
9381 /* The program should not reach this point. */
9382 emit_insn (gen_ud2 ());
9386 ix86_finalize_stack_frame_flags ();
9387 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
9389 m
->fs
.sp_realigned
= stack_realign_fp
;
9390 m
->fs
.sp_valid
= stack_realign_fp
9391 || !frame_pointer_needed
9392 || crtl
->sp_is_unchanging
;
9393 gcc_assert (!m
->fs
.sp_valid
9394 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
9396 /* The FP must be valid if the frame pointer is present. */
9397 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
9398 gcc_assert (!m
->fs
.fp_valid
9399 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
9401 /* We must have *some* valid pointer to the stack frame. */
9402 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
9404 /* The DRAP is never valid at this point. */
9405 gcc_assert (!m
->fs
.drap_valid
);
9407 /* See the comment about red zone and frame
9408 pointer usage in ix86_expand_prologue. */
9409 if (frame_pointer_needed
&& frame
.red_zone_size
)
9410 emit_insn (gen_memory_blockage ());
9412 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
9413 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
9415 /* Determine the CFA offset of the end of the red-zone. */
9416 m
->fs
.red_zone_offset
= 0;
9417 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
9419 /* The red-zone begins below return address and error code in
9420 exception handler. */
9421 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
9423 /* When the register save area is in the aligned portion of
9424 the stack, determine the maximum runtime displacement that
9425 matches up with the aligned frame. */
9426 if (stack_realign_drap
)
9427 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
9431 HOST_WIDE_INT reg_save_offset
= frame
.reg_save_offset
;
9433 /* Special care must be taken for the normal return case of a function
9434 using eh_return: the eax and edx registers are marked as saved, but
9435 not restored along this path. Adjust the save location to match. */
9436 if (crtl
->calls_eh_return
&& style
!= 2)
9437 reg_save_offset
-= 2 * UNITS_PER_WORD
;
9439 /* EH_RETURN requires the use of moves to function properly. */
9440 if (crtl
->calls_eh_return
)
9441 restore_regs_via_mov
= true;
9442 /* SEH requires the use of pops to identify the epilogue. */
9443 else if (TARGET_SEH
)
9444 restore_regs_via_mov
= false;
9445 /* If we're only restoring one register and sp cannot be used then
9446 using a move instruction to restore the register since it's
9447 less work than reloading sp and popping the register. */
9448 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
9449 restore_regs_via_mov
= true;
9450 else if (TARGET_EPILOGUE_USING_MOVE
9451 && cfun
->machine
->use_fast_prologue_epilogue
9453 || m
->fs
.sp_offset
!= reg_save_offset
))
9454 restore_regs_via_mov
= true;
9455 else if (frame_pointer_needed
9457 && m
->fs
.sp_offset
!= reg_save_offset
)
9458 restore_regs_via_mov
= true;
9459 else if (frame_pointer_needed
9461 && cfun
->machine
->use_fast_prologue_epilogue
9462 && frame
.nregs
== 1)
9463 restore_regs_via_mov
= true;
9465 restore_regs_via_mov
= false;
9467 if (restore_regs_via_mov
|| frame
.nsseregs
)
9469 /* Ensure that the entire register save area is addressable via
9470 the stack pointer, if we will restore SSE regs via sp. */
9472 && m
->fs
.sp_offset
> 0x7fffffff
9473 && sp_valid_at (frame
.stack_realign_offset
+ 1)
9474 && (frame
.nsseregs
+ frame
.nregs
) != 0)
9476 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9477 GEN_INT (m
->fs
.sp_offset
9478 - frame
.sse_reg_save_offset
),
9480 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9484 /* If there are any SSE registers to restore, then we have to do it
9485 via moves, since there's obviously no pop for SSE regs. */
9487 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
9490 if (m
->call_ms2sysv
)
9492 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
9494 /* We cannot use a tail-call for the stub if:
9495 1. We have to pop incoming args,
9496 2. We have additional int regs to restore, or
9497 3. A sibling call will be the tail-call, or
9498 4. We are emitting an eh_return_internal epilogue.
9500 TODO: Item 4 has not yet tested!
9502 If any of the above are true, we will call the stub rather than
9504 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
9505 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
9508 /* If using out-of-line stub that is a tail-call, then...*/
9509 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
9511 /* TODO: parinoid tests. (remove eventually) */
9512 gcc_assert (m
->fs
.sp_valid
);
9513 gcc_assert (!m
->fs
.sp_realigned
);
9514 gcc_assert (!m
->fs
.fp_valid
);
9515 gcc_assert (!m
->fs
.realigned
);
9516 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
9517 gcc_assert (!crtl
->drap_reg
);
9518 gcc_assert (!frame
.nregs
);
9520 else if (restore_regs_via_mov
)
9525 ix86_emit_restore_regs_using_mov (reg_save_offset
, style
== 2);
9527 /* eh_return epilogues need %ecx added to the stack pointer. */
9530 rtx sa
= EH_RETURN_STACKADJ_RTX
;
9533 /* Stack realignment doesn't work with eh_return. */
9534 if (crtl
->stack_realign_needed
)
9535 sorry ("Stack realignment not supported with "
9536 "%<__builtin_eh_return%>");
9538 /* regparm nested functions don't work with eh_return. */
9539 if (ix86_static_chain_on_stack
)
9540 sorry ("regparm nested function not supported with "
9541 "%<__builtin_eh_return%>");
9543 if (frame_pointer_needed
)
9545 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
9546 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
9547 emit_insn (gen_rtx_SET (sa
, t
));
9549 /* NB: eh_return epilogues must restore the frame pointer
9550 in word_mode since the upper 32 bits of RBP register
9551 can have any values. */
9552 t
= gen_frame_mem (word_mode
, hard_frame_pointer_rtx
);
9553 rtx frame_reg
= gen_rtx_REG (word_mode
,
9554 HARD_FRAME_POINTER_REGNUM
);
9555 insn
= emit_move_insn (frame_reg
, t
);
9557 /* Note that we use SA as a temporary CFA, as the return
9558 address is at the proper place relative to it. We
9559 pretend this happens at the FP restore insn because
9560 prior to this insn the FP would be stored at the wrong
9561 offset relative to SA, and after this insn we have no
9562 other reasonable register to use for the CFA. We don't
9563 bother resetting the CFA to the SP for the duration of
9564 the return insn, unless the control flow instrumentation
9565 is done. In this case the SP is used later and we have
9566 to reset CFA to SP. */
9567 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9568 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
9569 ix86_add_queued_cfa_restore_notes (insn
);
9570 add_reg_note (insn
, REG_CFA_RESTORE
, frame_reg
);
9571 RTX_FRAME_RELATED_P (insn
) = 1;
9574 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9575 m
->fs
.fp_valid
= false;
9577 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
9579 flag_cf_protection
);
9583 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
9584 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
9585 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
9586 ix86_add_queued_cfa_restore_notes (insn
);
9588 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9589 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
9591 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9592 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9593 plus_constant (Pmode
, stack_pointer_rtx
,
9595 RTX_FRAME_RELATED_P (insn
) = 1;
9598 m
->fs
.sp_offset
= UNITS_PER_WORD
;
9599 m
->fs
.sp_valid
= true;
9600 m
->fs
.sp_realigned
= false;
9605 /* SEH requires that the function end with (1) a stack adjustment
9606 if necessary, (2) a sequence of pops, and (3) a return or
9607 jump instruction. Prevent insns from the function body from
9608 being scheduled into this sequence. */
9611 /* Prevent a catch region from being adjacent to the standard
9612 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9613 nor several other flags that would be interesting to test are
9615 if (flag_non_call_exceptions
)
9616 emit_insn (gen_nops (const1_rtx
));
9618 emit_insn (gen_blockage ());
9621 /* First step is to deallocate the stack frame so that we can
9622 pop the registers. If the stack pointer was realigned, it needs
9623 to be restored now. Also do it on SEH target for very large
9624 frame as the emitted instructions aren't allowed by the ABI
9626 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
9628 && (m
->fs
.sp_offset
- reg_save_offset
9629 >= SEH_MAX_FRAME_SIZE
)))
9631 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
9632 GEN_INT (m
->fs
.fp_offset
9636 else if (m
->fs
.sp_offset
!= reg_save_offset
)
9638 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9639 GEN_INT (m
->fs
.sp_offset
9642 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9645 ix86_emit_restore_regs_using_pop ();
9648 /* If we used a stack pointer and haven't already got rid of it,
9652 /* If the stack pointer is valid and pointing at the frame
9653 pointer store address, then we only need a pop. */
9654 if (sp_valid_at (frame
.hfp_save_offset
)
9655 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
9656 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
9657 /* Leave results in shorter dependency chains on CPUs that are
9658 able to grok it fast. */
9659 else if (TARGET_USE_LEAVE
9660 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
9661 || !cfun
->machine
->use_fast_prologue_epilogue
)
9662 ix86_emit_leave (NULL
);
9665 pro_epilogue_adjust_stack (stack_pointer_rtx
,
9666 hard_frame_pointer_rtx
,
9667 const0_rtx
, style
, !using_drap
);
9668 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
9674 int param_ptr_offset
= UNITS_PER_WORD
;
9677 gcc_assert (stack_realign_drap
);
9679 if (ix86_static_chain_on_stack
)
9680 param_ptr_offset
+= UNITS_PER_WORD
;
9681 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9682 param_ptr_offset
+= UNITS_PER_WORD
;
9684 insn
= emit_insn (gen_rtx_SET
9686 plus_constant (Pmode
, crtl
->drap_reg
,
9687 -param_ptr_offset
)));
9688 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9689 m
->fs
.cfa_offset
= param_ptr_offset
;
9690 m
->fs
.sp_offset
= param_ptr_offset
;
9691 m
->fs
.realigned
= false;
9693 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9694 plus_constant (Pmode
, stack_pointer_rtx
,
9696 RTX_FRAME_RELATED_P (insn
) = 1;
9698 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9699 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
9702 /* At this point the stack pointer must be valid, and we must have
9703 restored all of the registers. We may not have deallocated the
9704 entire stack frame. We've delayed this until now because it may
9705 be possible to merge the local stack deallocation with the
9706 deallocation forced by ix86_static_chain_on_stack. */
9707 gcc_assert (m
->fs
.sp_valid
);
9708 gcc_assert (!m
->fs
.sp_realigned
);
9709 gcc_assert (!m
->fs
.fp_valid
);
9710 gcc_assert (!m
->fs
.realigned
);
9711 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
9713 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9714 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
9718 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9720 /* Sibcall epilogues don't want a return instruction. */
9723 m
->fs
= frame_state_save
;
9727 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
9728 emit_jump_insn (gen_interrupt_return ());
9729 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
9731 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
9733 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9734 address, do explicit add, and jump indirectly to the caller. */
9736 if (crtl
->args
.pops_args
>= 65536)
9738 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
9741 /* There is no "pascal" calling convention in any 64bit ABI. */
9742 gcc_assert (!TARGET_64BIT
);
9744 insn
= emit_insn (gen_pop (ecx
));
9745 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9746 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9748 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9749 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9750 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9751 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9752 RTX_FRAME_RELATED_P (insn
) = 1;
9754 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9756 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9759 emit_jump_insn (gen_simple_return_pop_internal (popc
));
9761 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
9763 /* In case of return from EH a simple return cannot be used
9764 as a return address will be compared with a shadow stack
9765 return address. Use indirect jump instead. */
9766 if (style
== 2 && flag_cf_protection
)
9768 /* Register used in indirect jump must be in word_mode. But
9769 Pmode may not be the same as word_mode for x32. */
9770 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
9773 insn
= emit_insn (gen_pop (ecx
));
9774 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9775 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9777 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9778 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9779 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9780 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9781 RTX_FRAME_RELATED_P (insn
) = 1;
9783 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9786 emit_jump_insn (gen_simple_return_internal ());
9789 /* Restore the state back to the state from the prologue,
9790 so that it's correct for the next epilogue. */
9791 m
->fs
= frame_state_save
;
9794 /* Reset from the function's potential modifications. */
9797 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
9799 if (pic_offset_table_rtx
9800 && !ix86_use_pseudo_pic_reg ())
9801 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9805 rtx_insn
*insn
= get_last_insn ();
9806 rtx_insn
*deleted_debug_label
= NULL
;
9808 /* Mach-O doesn't support labels at the end of objects, so if
9809 it looks like we might want one, take special action.
9810 First, collect any sequence of deleted debug labels. */
9813 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
9815 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9816 notes only, instead set their CODE_LABEL_NUMBER to -1,
9817 otherwise there would be code generation differences
9818 in between -g and -g0. */
9819 if (NOTE_P (insn
) && NOTE_KIND (insn
)
9820 == NOTE_INSN_DELETED_DEBUG_LABEL
)
9821 deleted_debug_label
= insn
;
9822 insn
= PREV_INSN (insn
);
9828 then this needs to be detected, so skip past the barrier. */
9830 if (insn
&& BARRIER_P (insn
))
9831 insn
= PREV_INSN (insn
);
9833 /* Up to now we've only seen notes or barriers. */
9838 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
9839 /* Trailing label. */
9840 fputs ("\tnop\n", file
);
9841 else if (cfun
&& ! cfun
->is_thunk
)
9843 /* See if we have a completely empty function body, skipping
9844 the special case of the picbase thunk emitted as asm. */
9845 while (insn
&& ! INSN_P (insn
))
9846 insn
= PREV_INSN (insn
);
9847 /* If we don't find any insns, we've got an empty function body;
9848 I.e. completely empty - without a return or branch. This is
9849 taken as the case where a function body has been removed
9850 because it contains an inline __builtin_unreachable(). GCC
9851 declares that reaching __builtin_unreachable() means UB so
9852 we're not obliged to do anything special; however, we want
9853 non-zero-sized function bodies. To meet this, and help the
9854 user out, let's trap the case. */
9856 fputs ("\tud2\n", file
);
9859 else if (deleted_debug_label
)
9860 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
9861 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
9862 CODE_LABEL_NUMBER (insn
) = -1;
9866 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9869 ix86_print_patchable_function_entry (FILE *file
,
9870 unsigned HOST_WIDE_INT patch_area_size
,
9873 if (cfun
->machine
->function_label_emitted
)
9875 /* NB: When ix86_print_patchable_function_entry is called after
9876 function table has been emitted, we have inserted or queued
9877 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9878 place. There is nothing to do here. */
9882 default_print_patchable_function_entry (file
, patch_area_size
,
9886 /* Output patchable area. NB: default_print_patchable_function_entry
9887 isn't available in i386.md. */
9890 ix86_output_patchable_area (unsigned int patch_area_size
,
9893 default_print_patchable_function_entry (asm_out_file
,
9898 /* Return a scratch register to use in the split stack prologue. The
9899 split stack prologue is used for -fsplit-stack. It is the first
9900 instructions in the function, even before the regular prologue.
9901 The scratch register can be any caller-saved register which is not
9902 used for parameters or for the static chain. */
9905 split_stack_prologue_scratch_regno (void)
9911 bool is_fastcall
, is_thiscall
;
9914 is_fastcall
= (lookup_attribute ("fastcall",
9915 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9917 is_thiscall
= (lookup_attribute ("thiscall",
9918 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9920 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
9924 if (DECL_STATIC_CHAIN (cfun
->decl
))
9926 sorry ("%<-fsplit-stack%> does not support fastcall with "
9928 return INVALID_REGNUM
;
9932 else if (is_thiscall
)
9934 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9938 else if (regparm
< 3)
9940 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9946 sorry ("%<-fsplit-stack%> does not support 2 register "
9947 "parameters for a nested function");
9948 return INVALID_REGNUM
;
9955 /* FIXME: We could make this work by pushing a register
9956 around the addition and comparison. */
9957 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9958 return INVALID_REGNUM
;
9963 /* A SYMBOL_REF for the function which allocates new stackspace for
9966 static GTY(()) rtx split_stack_fn
;
9968 /* A SYMBOL_REF for the more stack function when using the large
9971 static GTY(()) rtx split_stack_fn_large
;
9973 /* Return location of the stack guard value in the TLS block. */
9976 ix86_split_stack_guard (void)
9979 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
9982 gcc_assert (flag_split_stack
);
9984 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9985 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
9990 r
= GEN_INT (offset
);
9991 r
= gen_const_mem (Pmode
, r
);
9992 set_mem_addr_space (r
, as
);
9997 /* Handle -fsplit-stack. These are the first instructions in the
9998 function, even before the regular prologue. */
10001 ix86_expand_split_stack_prologue (void)
10003 HOST_WIDE_INT allocate
;
10004 unsigned HOST_WIDE_INT args_size
;
10005 rtx_code_label
*label
;
10006 rtx limit
, current
, allocate_rtx
, call_fusage
;
10007 rtx_insn
*call_insn
;
10008 rtx scratch_reg
= NULL_RTX
;
10009 rtx_code_label
*varargs_label
= NULL
;
10012 gcc_assert (flag_split_stack
&& reload_completed
);
10014 ix86_finalize_stack_frame_flags ();
10015 struct ix86_frame
&frame
= cfun
->machine
->frame
;
10016 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
10018 /* This is the label we will branch to if we have enough stack
10019 space. We expect the basic block reordering pass to reverse this
10020 branch if optimizing, so that we branch in the unlikely case. */
10021 label
= gen_label_rtx ();
10023 /* We need to compare the stack pointer minus the frame size with
10024 the stack boundary in the TCB. The stack boundary always gives
10025 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10026 can compare directly. Otherwise we need to do an addition. */
10028 limit
= ix86_split_stack_guard ();
10030 if (allocate
< SPLIT_STACK_AVAILABLE
)
10031 current
= stack_pointer_rtx
;
10034 unsigned int scratch_regno
;
10037 /* We need a scratch register to hold the stack pointer minus
10038 the required frame size. Since this is the very start of the
10039 function, the scratch register can be any caller-saved
10040 register which is not used for parameters. */
10041 offset
= GEN_INT (- allocate
);
10042 scratch_regno
= split_stack_prologue_scratch_regno ();
10043 if (scratch_regno
== INVALID_REGNUM
)
10045 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
10046 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
10048 /* We don't use gen_add in this case because it will
10049 want to split to lea, but when not optimizing the insn
10050 will not be split after this point. */
10051 emit_insn (gen_rtx_SET (scratch_reg
,
10052 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10057 emit_move_insn (scratch_reg
, offset
);
10058 emit_insn (gen_add2_insn (scratch_reg
, stack_pointer_rtx
));
10060 current
= scratch_reg
;
10063 ix86_expand_branch (GEU
, current
, limit
, label
);
10064 rtx_insn
*jump_insn
= get_last_insn ();
10065 JUMP_LABEL (jump_insn
) = label
;
10067 /* Mark the jump as very likely to be taken. */
10068 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
10070 if (split_stack_fn
== NULL_RTX
)
10072 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
10073 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
10075 fn
= split_stack_fn
;
10077 /* Get more stack space. We pass in the desired stack space and the
10078 size of the arguments to copy to the new stack. In 32-bit mode
10079 we push the parameters; __morestack will return on a new stack
10080 anyhow. In 64-bit mode we pass the parameters in r10 and
10082 allocate_rtx
= GEN_INT (allocate
);
10083 args_size
= crtl
->args
.size
>= 0 ? (HOST_WIDE_INT
) crtl
->args
.size
: 0;
10084 call_fusage
= NULL_RTX
;
10085 rtx pop
= NULL_RTX
;
10090 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
10091 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
10093 /* If this function uses a static chain, it will be in %r10.
10094 Preserve it across the call to __morestack. */
10095 if (DECL_STATIC_CHAIN (cfun
->decl
))
10099 rax
= gen_rtx_REG (word_mode
, AX_REG
);
10100 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
10101 use_reg (&call_fusage
, rax
);
10104 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
10107 HOST_WIDE_INT argval
;
10109 gcc_assert (Pmode
== DImode
);
10110 /* When using the large model we need to load the address
10111 into a register, and we've run out of registers. So we
10112 switch to a different calling convention, and we call a
10113 different function: __morestack_large. We pass the
10114 argument size in the upper 32 bits of r10 and pass the
10115 frame size in the lower 32 bits. */
10116 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
10117 gcc_assert ((args_size
& 0xffffffff) == args_size
);
10119 if (split_stack_fn_large
== NULL_RTX
)
10121 split_stack_fn_large
10122 = gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
10123 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
10125 if (ix86_cmodel
== CM_LARGE_PIC
)
10127 rtx_code_label
*label
;
10130 label
= gen_label_rtx ();
10131 emit_label (label
);
10132 LABEL_PRESERVE_P (label
) = 1;
10133 emit_insn (gen_set_rip_rex64 (reg10
, label
));
10134 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
10135 emit_insn (gen_add2_insn (reg10
, reg11
));
10136 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
10138 x
= gen_rtx_CONST (Pmode
, x
);
10139 emit_move_insn (reg11
, x
);
10140 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
10141 x
= gen_const_mem (Pmode
, x
);
10142 emit_move_insn (reg11
, x
);
10145 emit_move_insn (reg11
, split_stack_fn_large
);
10149 argval
= ((args_size
<< 16) << 16) + allocate
;
10150 emit_move_insn (reg10
, GEN_INT (argval
));
10154 emit_move_insn (reg10
, allocate_rtx
);
10155 emit_move_insn (reg11
, GEN_INT (args_size
));
10156 use_reg (&call_fusage
, reg11
);
10159 use_reg (&call_fusage
, reg10
);
10163 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
10164 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
10165 insn
= emit_insn (gen_push (allocate_rtx
));
10166 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
10167 pop
= GEN_INT (2 * UNITS_PER_WORD
);
10169 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
10170 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
10172 add_function_usage_to (call_insn
, call_fusage
);
10174 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
10175 /* Indicate that this function can't jump to non-local gotos. */
10176 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
10178 /* In order to make call/return prediction work right, we now need
10179 to execute a return instruction. See
10180 libgcc/config/i386/morestack.S for the details on how this works.
10182 For flow purposes gcc must not see this as a return
10183 instruction--we need control flow to continue at the subsequent
10184 label. Therefore, we use an unspec. */
10185 gcc_assert (crtl
->args
.pops_args
< 65536);
10187 = emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
10189 if ((flag_cf_protection
& CF_BRANCH
))
10191 /* Insert ENDBR since __morestack will jump back here via indirect
10193 rtx cet_eb
= gen_nop_endbr ();
10194 emit_insn_after (cet_eb
, ret_insn
);
10197 /* If we are in 64-bit mode and this function uses a static chain,
10198 we saved %r10 in %rax before calling _morestack. */
10199 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
10200 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10201 gen_rtx_REG (word_mode
, AX_REG
));
10203 /* If this function calls va_start, we need to store a pointer to
10204 the arguments on the old stack, because they may not have been
10205 all copied to the new stack. At this point the old stack can be
10206 found at the frame pointer value used by __morestack, because
10207 __morestack has set that up before calling back to us. Here we
10208 store that pointer in a scratch register, and in
10209 ix86_expand_prologue we store the scratch register in a stack
10211 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
10213 unsigned int scratch_regno
;
10217 scratch_regno
= split_stack_prologue_scratch_regno ();
10218 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
10219 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
10223 return address within this function
10224 return address of caller of this function
10226 So we add three words to get to the stack arguments.
10230 return address within this function
10231 first argument to __morestack
10232 second argument to __morestack
10233 return address of caller of this function
10235 So we add five words to get to the stack arguments.
10237 words
= TARGET_64BIT
? 3 : 5;
10238 emit_insn (gen_rtx_SET (scratch_reg
,
10239 plus_constant (Pmode
, frame_reg
,
10240 words
* UNITS_PER_WORD
)));
10242 varargs_label
= gen_label_rtx ();
10243 emit_jump_insn (gen_jump (varargs_label
));
10244 JUMP_LABEL (get_last_insn ()) = varargs_label
;
10249 emit_label (label
);
10250 LABEL_NUSES (label
) = 1;
10252 /* If this function calls va_start, we now have to set the scratch
10253 register for the case where we do not call __morestack. In this
10254 case we need to set it based on the stack pointer. */
10255 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
10257 emit_insn (gen_rtx_SET (scratch_reg
,
10258 plus_constant (Pmode
, stack_pointer_rtx
,
10261 emit_label (varargs_label
);
10262 LABEL_NUSES (varargs_label
) = 1;
10266 /* We may have to tell the dataflow pass that the split stack prologue
10267 is initializing a scratch register. */
10270 ix86_live_on_entry (bitmap regs
)
10272 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
10274 gcc_assert (flag_split_stack
);
10275 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
10279 /* Extract the parts of an RTL expression that is a valid memory address
10280 for an instruction. Return false if the structure of the address is
10284 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
10286 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
10287 rtx base_reg
, index_reg
;
10288 HOST_WIDE_INT scale
= 1;
10289 rtx scale_rtx
= NULL_RTX
;
10291 addr_space_t seg
= ADDR_SPACE_GENERIC
;
10293 /* Allow zero-extended SImode addresses,
10294 they will be emitted with addr32 prefix. */
10295 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
10297 if (GET_CODE (addr
) == ZERO_EXTEND
10298 && GET_MODE (XEXP (addr
, 0)) == SImode
)
10300 addr
= XEXP (addr
, 0);
10301 if (CONST_INT_P (addr
))
10304 else if (GET_CODE (addr
) == AND
10305 && const_32bit_mask (XEXP (addr
, 1), DImode
))
10307 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
10308 if (addr
== NULL_RTX
)
10311 if (CONST_INT_P (addr
))
10314 else if (GET_CODE (addr
) == AND
)
10316 /* For ASHIFT inside AND, combine will not generate
10317 canonical zero-extend. Merge mask for AND and shift_count
10318 to check if it is canonical zero-extend. */
10319 tmp
= XEXP (addr
, 0);
10320 rtx mask
= XEXP (addr
, 1);
10321 if (tmp
&& GET_CODE(tmp
) == ASHIFT
)
10323 rtx shift_val
= XEXP (tmp
, 1);
10324 if (CONST_INT_P (mask
) && CONST_INT_P (shift_val
)
10325 && (((unsigned HOST_WIDE_INT
) INTVAL(mask
)
10326 | ((HOST_WIDE_INT_1U
<< INTVAL(shift_val
)) - 1))
10329 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0),
10337 /* Allow SImode subregs of DImode addresses,
10338 they will be emitted with addr32 prefix. */
10339 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
10341 if (SUBREG_P (addr
)
10342 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
10344 addr
= SUBREG_REG (addr
);
10345 if (CONST_INT_P (addr
))
10352 else if (SUBREG_P (addr
))
10354 if (REG_P (SUBREG_REG (addr
)))
10359 else if (GET_CODE (addr
) == PLUS
)
10361 rtx addends
[4], op
;
10369 addends
[n
++] = XEXP (op
, 1);
10372 while (GET_CODE (op
) == PLUS
);
10377 for (i
= n
; i
>= 0; --i
)
10380 switch (GET_CODE (op
))
10385 index
= XEXP (op
, 0);
10386 scale_rtx
= XEXP (op
, 1);
10392 index
= XEXP (op
, 0);
10393 tmp
= XEXP (op
, 1);
10394 if (!CONST_INT_P (tmp
))
10396 scale
= INTVAL (tmp
);
10397 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10399 scale
= 1 << scale
;
10404 if (GET_CODE (op
) != UNSPEC
)
10409 if (XINT (op
, 1) == UNSPEC_TP
10410 && TARGET_TLS_DIRECT_SEG_REFS
10411 && seg
== ADDR_SPACE_GENERIC
)
10412 seg
= DEFAULT_TLS_SEG_REG
;
10418 if (!REG_P (SUBREG_REG (op
)))
10445 else if (GET_CODE (addr
) == MULT
)
10447 index
= XEXP (addr
, 0); /* index*scale */
10448 scale_rtx
= XEXP (addr
, 1);
10450 else if (GET_CODE (addr
) == ASHIFT
)
10452 /* We're called for lea too, which implements ashift on occasion. */
10453 index
= XEXP (addr
, 0);
10454 tmp
= XEXP (addr
, 1);
10455 if (!CONST_INT_P (tmp
))
10457 scale
= INTVAL (tmp
);
10458 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10460 scale
= 1 << scale
;
10463 disp
= addr
; /* displacement */
10469 else if (SUBREG_P (index
)
10470 && REG_P (SUBREG_REG (index
)))
10476 /* Extract the integral value of scale. */
10479 if (!CONST_INT_P (scale_rtx
))
10481 scale
= INTVAL (scale_rtx
);
10484 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
10485 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
10487 /* Avoid useless 0 displacement. */
10488 if (disp
== const0_rtx
&& (base
|| index
))
10491 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10492 if (base_reg
&& index_reg
&& scale
== 1
10493 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
10494 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
10495 || REGNO (index_reg
) == SP_REG
))
10497 std::swap (base
, index
);
10498 std::swap (base_reg
, index_reg
);
10501 /* Special case: %ebp cannot be encoded as a base without a displacement.
10503 if (!disp
&& base_reg
10504 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
10505 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
10506 || REGNO (base_reg
) == BP_REG
10507 || REGNO (base_reg
) == R13_REG
))
10510 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10511 Avoid this by transforming to [%esi+0].
10512 Reload calls address legitimization without cfun defined, so we need
10513 to test cfun for being non-NULL. */
10514 if (TARGET_CPU_P (K6
) && cfun
&& optimize_function_for_speed_p (cfun
)
10515 && base_reg
&& !index_reg
&& !disp
10516 && REGNO (base_reg
) == SI_REG
)
10519 /* Special case: encode reg+reg instead of reg*2. */
10520 if (!base
&& index
&& scale
== 2)
10521 base
= index
, base_reg
= index_reg
, scale
= 1;
10523 /* Special case: scaling cannot be encoded without base or displacement. */
10524 if (!base
&& !disp
&& index
&& scale
!= 1)
10528 out
->index
= index
;
10530 out
->scale
= scale
;
10536 /* Return cost of the memory address x.
10537 For i386, it is better to use a complex address than let gcc copy
10538 the address into a reg and make a new pseudo. But not if the address
10539 requires to two regs - that would mean more pseudos with longer
10542 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
10544 struct ix86_address parts
;
10546 int ok
= ix86_decompose_address (x
, &parts
);
10550 if (parts
.base
&& SUBREG_P (parts
.base
))
10551 parts
.base
= SUBREG_REG (parts
.base
);
10552 if (parts
.index
&& SUBREG_P (parts
.index
))
10553 parts
.index
= SUBREG_REG (parts
.index
);
10555 /* Attempt to minimize number of registers in the address by increasing
10556 address cost for each used register. We don't increase address cost
10557 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10558 is not invariant itself it most likely means that base or index is not
10559 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10560 which is not profitable for x86. */
10562 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
10563 && (current_pass
->type
== GIMPLE_PASS
10564 || !pic_offset_table_rtx
10565 || !REG_P (parts
.base
)
10566 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
10570 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
10571 && (current_pass
->type
== GIMPLE_PASS
10572 || !pic_offset_table_rtx
10573 || !REG_P (parts
.index
)
10574 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
10577 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10578 since it's predecode logic can't detect the length of instructions
10579 and it degenerates to vector decoded. Increase cost of such
10580 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10581 to split such addresses or even refuse such addresses at all.
10583 Following addressing modes are affected:
10588 The first and last case may be avoidable by explicitly coding the zero in
10589 memory address, but I don't have AMD-K6 machine handy to check this
10592 if (TARGET_CPU_P (K6
)
10593 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
10594 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
10595 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
10601 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10602 this is used for to form addresses to local data when -fPIC is in
10606 darwin_local_data_pic (rtx disp
)
10608 return (GET_CODE (disp
) == UNSPEC
10609 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
10612 /* True if the function symbol operand X should be loaded from GOT.
10613 If CALL_P is true, X is a call operand.
10615 NB: -mno-direct-extern-access doesn't force load from GOT for
10618 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
10619 statements, since a PIC register could not be available at the
10623 ix86_force_load_from_GOT_p (rtx x
, bool call_p
)
10625 return ((TARGET_64BIT
|| (!flag_pic
&& HAVE_AS_IX86_GOT32X
))
10626 && !TARGET_PECOFF
&& !TARGET_MACHO
10627 && (!flag_pic
|| this_is_asm_operands
)
10628 && ix86_cmodel
!= CM_LARGE
10629 && ix86_cmodel
!= CM_LARGE_PIC
10630 && GET_CODE (x
) == SYMBOL_REF
10632 && (!ix86_direct_extern_access
10633 || (SYMBOL_REF_DECL (x
)
10634 && lookup_attribute ("nodirect_extern_access",
10635 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
))))))
10636 || (SYMBOL_REF_FUNCTION_P (x
)
10638 || (SYMBOL_REF_DECL (x
)
10639 && lookup_attribute ("noplt",
10640 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
)))))))
10641 && !SYMBOL_REF_LOCAL_P (x
));
10644 /* Determine if a given RTX is a valid constant. We already know this
10645 satisfies CONSTANT_P. */
10648 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
10650 switch (GET_CODE (x
))
10655 if (GET_CODE (x
) == PLUS
)
10657 if (!CONST_INT_P (XEXP (x
, 1)))
10662 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
10665 /* Only some unspecs are valid as "constants". */
10666 if (GET_CODE (x
) == UNSPEC
)
10667 switch (XINT (x
, 1))
10670 case UNSPEC_GOTOFF
:
10671 case UNSPEC_PLTOFF
:
10672 return TARGET_64BIT
;
10674 case UNSPEC_NTPOFF
:
10675 x
= XVECEXP (x
, 0, 0);
10676 return (GET_CODE (x
) == SYMBOL_REF
10677 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10678 case UNSPEC_DTPOFF
:
10679 x
= XVECEXP (x
, 0, 0);
10680 return (GET_CODE (x
) == SYMBOL_REF
10681 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
10686 /* We must have drilled down to a symbol. */
10687 if (GET_CODE (x
) == LABEL_REF
)
10689 if (GET_CODE (x
) != SYMBOL_REF
)
10694 /* TLS symbols are never valid. */
10695 if (SYMBOL_REF_TLS_MODEL (x
))
10698 /* DLLIMPORT symbols are never valid. */
10699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10700 && SYMBOL_REF_DLLIMPORT_P (x
))
10704 /* mdynamic-no-pic */
10705 if (MACHO_DYNAMIC_NO_PIC_P
)
10706 return machopic_symbol_defined_p (x
);
10709 /* External function address should be loaded
10710 via the GOT slot to avoid PLT. */
10711 if (ix86_force_load_from_GOT_p (x
))
10716 CASE_CONST_SCALAR_INT
:
10717 if (ix86_endbr_immediate_operand (x
, VOIDmode
))
10728 if (!standard_sse_constant_p (x
, mode
)
10729 && GET_MODE_SIZE (TARGET_AVX512F
10734 ? TImode
: DImode
))) < GET_MODE_SIZE (mode
))
10742 if (!standard_sse_constant_p (x
, mode
))
10747 if (mode
== E_BFmode
)
10754 /* Otherwise we handle everything else in the move patterns. */
10758 /* Determine if it's legal to put X into the constant pool. This
10759 is not possible for the address of thread-local symbols, which
10760 is checked above. */
10763 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
10765 /* We can put any immediate constant in memory. */
10766 switch (GET_CODE (x
))
10775 return !ix86_legitimate_constant_p (mode
, x
);
10778 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10782 is_imported_p (rtx x
)
10784 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10785 || GET_CODE (x
) != SYMBOL_REF
)
10788 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
10792 /* Nonzero if the constant value X is a legitimate general operand
10793 when generating PIC code. It is given that flag_pic is on and
10794 that X satisfies CONSTANT_P. */
10797 legitimate_pic_operand_p (rtx x
)
10801 switch (GET_CODE (x
))
10804 inner
= XEXP (x
, 0);
10805 if (GET_CODE (inner
) == PLUS
10806 && CONST_INT_P (XEXP (inner
, 1)))
10807 inner
= XEXP (inner
, 0);
10809 /* Only some unspecs are valid as "constants". */
10810 if (GET_CODE (inner
) == UNSPEC
)
10811 switch (XINT (inner
, 1))
10814 case UNSPEC_GOTOFF
:
10815 case UNSPEC_PLTOFF
:
10816 return TARGET_64BIT
;
10818 x
= XVECEXP (inner
, 0, 0);
10819 return (GET_CODE (x
) == SYMBOL_REF
10820 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10821 case UNSPEC_MACHOPIC_OFFSET
:
10822 return legitimate_pic_address_disp_p (x
);
10830 return legitimate_pic_address_disp_p (x
);
10837 /* Determine if a given CONST RTX is a valid memory displacement
10841 legitimate_pic_address_disp_p (rtx disp
)
10845 /* In 64bit mode we can allow direct addresses of symbols and labels
10846 when they are not dynamic symbols. */
10849 rtx op0
= disp
, op1
;
10851 switch (GET_CODE (disp
))
10857 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
10859 op0
= XEXP (XEXP (disp
, 0), 0);
10860 op1
= XEXP (XEXP (disp
, 0), 1);
10861 if (!CONST_INT_P (op1
))
10863 if (GET_CODE (op0
) == UNSPEC
10864 && (XINT (op0
, 1) == UNSPEC_DTPOFF
10865 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
10866 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
10868 if (INTVAL (op1
) >= 16*1024*1024
10869 || INTVAL (op1
) < -16*1024*1024)
10871 if (GET_CODE (op0
) == LABEL_REF
)
10873 if (GET_CODE (op0
) == CONST
10874 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
10875 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
10877 if (GET_CODE (op0
) == UNSPEC
10878 && XINT (op0
, 1) == UNSPEC_PCREL
)
10880 if (GET_CODE (op0
) != SYMBOL_REF
)
10885 /* TLS references should always be enclosed in UNSPEC.
10886 The dllimported symbol needs always to be resolved. */
10887 if (SYMBOL_REF_TLS_MODEL (op0
)
10888 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
10893 if (is_imported_p (op0
))
10896 if (SYMBOL_REF_FAR_ADDR_P (op0
) || !SYMBOL_REF_LOCAL_P (op0
))
10899 /* Non-external-weak function symbols need to be resolved only
10900 for the large model. Non-external symbols don't need to be
10901 resolved for large and medium models. For the small model,
10902 we don't need to resolve anything here. */
10903 if ((ix86_cmodel
!= CM_LARGE_PIC
10904 && SYMBOL_REF_FUNCTION_P (op0
)
10905 && !(SYMBOL_REF_EXTERNAL_P (op0
) && SYMBOL_REF_WEAK (op0
)))
10906 || !SYMBOL_REF_EXTERNAL_P (op0
)
10907 || ix86_cmodel
== CM_SMALL_PIC
)
10910 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
10911 && (SYMBOL_REF_LOCAL_P (op0
)
10912 || ((ix86_direct_extern_access
10913 && !(SYMBOL_REF_DECL (op0
)
10914 && lookup_attribute ("nodirect_extern_access",
10915 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0
)))))
10916 && HAVE_LD_PIE_COPYRELOC
10918 && !SYMBOL_REF_WEAK (op0
)
10919 && !SYMBOL_REF_FUNCTION_P (op0
)))
10920 && ix86_cmodel
!= CM_LARGE_PIC
)
10928 if (GET_CODE (disp
) != CONST
)
10930 disp
= XEXP (disp
, 0);
10934 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10935 of GOT tables. We should not need these anyway. */
10936 if (GET_CODE (disp
) != UNSPEC
10937 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
10938 && XINT (disp
, 1) != UNSPEC_GOTOFF
10939 && XINT (disp
, 1) != UNSPEC_PCREL
10940 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
10943 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
10944 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
10950 if (GET_CODE (disp
) == PLUS
)
10952 if (!CONST_INT_P (XEXP (disp
, 1)))
10954 disp
= XEXP (disp
, 0);
10958 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
10961 if (GET_CODE (disp
) != UNSPEC
)
10964 switch (XINT (disp
, 1))
10969 /* We need to check for both symbols and labels because VxWorks loads
10970 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10972 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10973 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
10974 case UNSPEC_GOTOFF
:
10975 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10976 While ABI specify also 32bit relocation but we don't produce it in
10977 small PIC model at all. */
10978 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10979 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
10981 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
10983 case UNSPEC_GOTTPOFF
:
10984 case UNSPEC_GOTNTPOFF
:
10985 case UNSPEC_INDNTPOFF
:
10988 disp
= XVECEXP (disp
, 0, 0);
10989 return (GET_CODE (disp
) == SYMBOL_REF
10990 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
10991 case UNSPEC_NTPOFF
:
10992 disp
= XVECEXP (disp
, 0, 0);
10993 return (GET_CODE (disp
) == SYMBOL_REF
10994 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
10995 case UNSPEC_DTPOFF
:
10996 disp
= XVECEXP (disp
, 0, 0);
10997 return (GET_CODE (disp
) == SYMBOL_REF
10998 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11004 /* Determine if op is suitable RTX for an address register.
11005 Return naked register if a register or a register subreg is
11006 found, otherwise return NULL_RTX. */
11009 ix86_validate_address_register (rtx op
)
11011 machine_mode mode
= GET_MODE (op
);
11013 /* Only SImode or DImode registers can form the address. */
11014 if (mode
!= SImode
&& mode
!= DImode
)
11019 else if (SUBREG_P (op
))
11021 rtx reg
= SUBREG_REG (op
);
11026 mode
= GET_MODE (reg
);
11028 /* Don't allow SUBREGs that span more than a word. It can
11029 lead to spill failures when the register is one word out
11030 of a two word structure. */
11031 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11034 /* Allow only SUBREGs of non-eliminable hard registers. */
11035 if (register_no_elim_operand (reg
, mode
))
11039 /* Op is not a register. */
11043 /* Recognizes RTL expressions that are valid memory addresses for an
11044 instruction. The MODE argument is the machine mode for the MEM
11045 expression that wants to use this address.
11047 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11048 convert common non-canonical forms to canonical form so that they will
11052 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
,
11053 code_helper
= ERROR_MARK
)
11055 struct ix86_address parts
;
11056 rtx base
, index
, disp
;
11057 HOST_WIDE_INT scale
;
11060 if (ix86_decompose_address (addr
, &parts
) == 0)
11061 /* Decomposition failed. */
11065 index
= parts
.index
;
11067 scale
= parts
.scale
;
11070 /* Validate base register. */
11073 rtx reg
= ix86_validate_address_register (base
);
11075 if (reg
== NULL_RTX
)
11078 unsigned int regno
= REGNO (reg
);
11079 if ((strict
&& !REGNO_OK_FOR_BASE_P (regno
))
11080 || (!strict
&& !REGNO_OK_FOR_BASE_NONSTRICT_P (regno
)))
11081 /* Base is not valid. */
11085 /* Validate index register. */
11088 rtx reg
= ix86_validate_address_register (index
);
11090 if (reg
== NULL_RTX
)
11093 unsigned int regno
= REGNO (reg
);
11094 if ((strict
&& !REGNO_OK_FOR_INDEX_P (regno
))
11095 || (!strict
&& !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno
)))
11096 /* Index is not valid. */
11100 /* Index and base should have the same mode. */
11102 && GET_MODE (base
) != GET_MODE (index
))
11105 /* Address override works only on the (%reg) part of %fs:(%reg). */
11106 if (seg
!= ADDR_SPACE_GENERIC
11107 && ((base
&& GET_MODE (base
) != word_mode
)
11108 || (index
&& GET_MODE (index
) != word_mode
)))
11111 /* Validate scale factor. */
11115 /* Scale without index. */
11118 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
11119 /* Scale is not a valid multiplier. */
11123 /* Validate displacement. */
11126 if (ix86_endbr_immediate_operand (disp
, VOIDmode
))
11129 if (GET_CODE (disp
) == CONST
11130 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
11131 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
11132 switch (XINT (XEXP (disp
, 0), 1))
11134 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11135 when used. While ABI specify also 32bit relocations, we
11136 don't produce them at all and use IP relative instead.
11137 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11138 should be loaded via GOT. */
11141 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
11142 goto is_legitimate_pic
;
11144 case UNSPEC_GOTOFF
:
11145 gcc_assert (flag_pic
);
11147 goto is_legitimate_pic
;
11149 /* 64bit address unspec. */
11152 case UNSPEC_GOTPCREL
:
11153 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
11154 goto is_legitimate_pic
;
11157 gcc_assert (flag_pic
);
11158 goto is_legitimate_pic
;
11160 case UNSPEC_GOTTPOFF
:
11161 case UNSPEC_GOTNTPOFF
:
11162 case UNSPEC_INDNTPOFF
:
11163 case UNSPEC_NTPOFF
:
11164 case UNSPEC_DTPOFF
:
11168 /* Invalid address unspec. */
11172 else if (SYMBOLIC_CONST (disp
)
11175 || (MACHOPIC_INDIRECT
11176 && !machopic_operand_p (disp
))
11182 if (TARGET_64BIT
&& (index
|| base
))
11184 /* foo@dtpoff(%rX) is ok. */
11185 if (GET_CODE (disp
) != CONST
11186 || GET_CODE (XEXP (disp
, 0)) != PLUS
11187 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
11188 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
11189 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
11190 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
11191 /* Non-constant pic memory reference. */
11194 else if ((!TARGET_MACHO
|| flag_pic
)
11195 && ! legitimate_pic_address_disp_p (disp
))
11196 /* Displacement is an invalid pic construct. */
11199 else if (MACHO_DYNAMIC_NO_PIC_P
11200 && !ix86_legitimate_constant_p (Pmode
, disp
))
11201 /* displacment must be referenced via non_lazy_pointer */
11205 /* This code used to verify that a symbolic pic displacement
11206 includes the pic_offset_table_rtx register.
11208 While this is good idea, unfortunately these constructs may
11209 be created by "adds using lea" optimization for incorrect
11218 This code is nonsensical, but results in addressing
11219 GOT table with pic_offset_table_rtx base. We can't
11220 just refuse it easily, since it gets matched by
11221 "addsi3" pattern, that later gets split to lea in the
11222 case output register differs from input. While this
11223 can be handled by separate addsi pattern for this case
11224 that never results in lea, this seems to be easier and
11225 correct fix for crash to disable this test. */
11227 else if (GET_CODE (disp
) != LABEL_REF
11228 && !CONST_INT_P (disp
)
11229 && (GET_CODE (disp
) != CONST
11230 || !ix86_legitimate_constant_p (Pmode
, disp
))
11231 && (GET_CODE (disp
) != SYMBOL_REF
11232 || !ix86_legitimate_constant_p (Pmode
, disp
)))
11233 /* Displacement is not constant. */
11235 else if (TARGET_64BIT
11236 && !x86_64_immediate_operand (disp
, VOIDmode
))
11237 /* Displacement is out of range. */
11239 /* In x32 mode, constant addresses are sign extended to 64bit, so
11240 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11241 else if (TARGET_X32
&& !(index
|| base
)
11242 && CONST_INT_P (disp
)
11243 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
11247 /* Everything looks valid. */
11251 /* Determine if a given RTX is a valid constant address. */
11254 constant_address_p (rtx x
)
11256 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
11259 /* Return a unique alias set for the GOT. */
11262 ix86_GOT_alias_set (void)
11264 static alias_set_type set
= -1;
11266 set
= new_alias_set ();
11270 /* Return a legitimate reference for ORIG (an address) using the
11271 register REG. If REG is 0, a new pseudo is generated.
11273 There are two types of references that must be handled:
11275 1. Global data references must load the address from the GOT, via
11276 the PIC reg. An insn is emitted to do this load, and the reg is
11279 2. Static data references, constant pool addresses, and code labels
11280 compute the address as an offset from the GOT, whose base is in
11281 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11282 differentiate them from global data objects. The returned
11283 address is the PIC reg + an unspec constant.
11285 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11286 reg also appears in the address. */
11289 legitimize_pic_address (rtx orig
, rtx reg
)
11292 rtx new_rtx
= orig
;
11295 if (TARGET_MACHO
&& !TARGET_64BIT
)
11298 reg
= gen_reg_rtx (Pmode
);
11299 /* Use the generic Mach-O PIC machinery. */
11300 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
11304 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11306 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
11311 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
11313 else if ((!TARGET_64BIT
11314 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
11316 && gotoff_operand (addr
, Pmode
))
11318 /* This symbol may be referenced via a displacement
11319 from the PIC base address (@GOTOFF). */
11320 if (GET_CODE (addr
) == CONST
)
11321 addr
= XEXP (addr
, 0);
11323 if (GET_CODE (addr
) == PLUS
)
11325 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
11327 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
11330 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
11332 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11335 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11339 gcc_assert (REG_P (reg
));
11340 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
11341 new_rtx
, reg
, 1, OPTAB_DIRECT
);
11344 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11346 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
11347 /* We can't always use @GOTOFF for text labels
11348 on VxWorks, see gotoff_operand. */
11349 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
11351 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
11355 /* For x64 PE-COFF there is no GOT table,
11356 so we use address directly. */
11357 if (TARGET_64BIT
&& TARGET_PECOFF
)
11359 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
11360 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11362 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
11364 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
11366 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11367 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11368 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11372 /* This symbol must be referenced via a load
11373 from the Global Offset Table (@GOT). */
11374 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
11375 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11378 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11382 gcc_assert (REG_P (reg
));
11383 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
11384 new_rtx
, reg
, 1, OPTAB_DIRECT
);
11387 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11389 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11390 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11393 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11397 if (CONST_INT_P (addr
)
11398 && !x86_64_immediate_operand (addr
, VOIDmode
))
11399 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
11400 else if (GET_CODE (addr
) == CONST
)
11402 addr
= XEXP (addr
, 0);
11404 /* We must match stuff we generate before. Assume the only
11405 unspecs that can get here are ours. Not that we could do
11406 anything with them anyway.... */
11407 if (GET_CODE (addr
) == UNSPEC
11408 || (GET_CODE (addr
) == PLUS
11409 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
11411 gcc_assert (GET_CODE (addr
) == PLUS
);
11414 if (GET_CODE (addr
) == PLUS
)
11416 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
11418 /* Check first to see if this is a constant
11419 offset from a @GOTOFF symbol reference. */
11421 && gotoff_operand (op0
, Pmode
)
11422 && CONST_INT_P (op1
))
11426 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
11428 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
11429 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11433 gcc_assert (REG_P (reg
));
11434 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
11435 pic_offset_table_rtx
,
11441 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11445 if (INTVAL (op1
) < -16*1024*1024
11446 || INTVAL (op1
) >= 16*1024*1024)
11448 if (!x86_64_immediate_operand (op1
, Pmode
))
11449 op1
= force_reg (Pmode
, op1
);
11452 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
11458 rtx base
= legitimize_pic_address (op0
, reg
);
11459 machine_mode mode
= GET_MODE (base
);
11461 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
11463 if (CONST_INT_P (new_rtx
))
11465 if (INTVAL (new_rtx
) < -16*1024*1024
11466 || INTVAL (new_rtx
) >= 16*1024*1024)
11468 if (!x86_64_immediate_operand (new_rtx
, mode
))
11469 new_rtx
= force_reg (mode
, new_rtx
);
11472 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
11475 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
11479 /* For %rip addressing, we have to use
11480 just disp32, not base nor index. */
11482 && (GET_CODE (base
) == SYMBOL_REF
11483 || GET_CODE (base
) == LABEL_REF
))
11484 base
= force_reg (mode
, base
);
11485 if (GET_CODE (new_rtx
) == PLUS
11486 && CONSTANT_P (XEXP (new_rtx
, 1)))
11488 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
11489 new_rtx
= XEXP (new_rtx
, 1);
11491 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
11499 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11502 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
11504 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
11506 if (GET_MODE (tp
) != tp_mode
)
11508 gcc_assert (GET_MODE (tp
) == SImode
);
11509 gcc_assert (tp_mode
== DImode
);
11511 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
11515 tp
= copy_to_mode_reg (tp_mode
, tp
);
11520 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11522 static GTY(()) rtx ix86_tls_symbol
;
11525 ix86_tls_get_addr (void)
11527 if (!ix86_tls_symbol
)
11530 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
11531 ? "___tls_get_addr" : "__tls_get_addr");
11533 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
11536 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
11538 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
11540 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
11541 gen_rtx_CONST (Pmode
, unspec
));
11544 return ix86_tls_symbol
;
11547 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11549 static GTY(()) rtx ix86_tls_module_base_symbol
;
11552 ix86_tls_module_base (void)
11554 if (!ix86_tls_module_base_symbol
)
11556 ix86_tls_module_base_symbol
11557 = gen_rtx_SYMBOL_REF (ptr_mode
, "_TLS_MODULE_BASE_");
11559 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
11560 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
11563 return ix86_tls_module_base_symbol
;
11566 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11567 false if we expect this to be used for a memory address and true if
11568 we expect to load the address into a register. */
11571 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
11573 rtx dest
, base
, off
;
11574 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
11575 machine_mode tp_mode
= Pmode
;
11578 /* Fall back to global dynamic model if tool chain cannot support local
11580 if (TARGET_SUN_TLS
&& !TARGET_64BIT
11581 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
11582 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
11583 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
11587 case TLS_MODEL_GLOBAL_DYNAMIC
:
11590 if (flag_pic
&& !TARGET_PECOFF
)
11591 pic
= pic_offset_table_rtx
;
11594 pic
= gen_reg_rtx (Pmode
);
11595 emit_insn (gen_set_got (pic
));
11599 if (TARGET_GNU2_TLS
)
11601 dest
= gen_reg_rtx (ptr_mode
);
11603 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, dest
, x
));
11605 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
11607 tp
= get_thread_pointer (ptr_mode
, true);
11608 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
11609 if (GET_MODE (dest
) != Pmode
)
11610 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
11611 dest
= force_reg (Pmode
, dest
);
11613 if (GET_MODE (x
) != Pmode
)
11614 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11616 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
11620 rtx caddr
= ix86_tls_get_addr ();
11622 dest
= gen_reg_rtx (Pmode
);
11625 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
11630 (gen_tls_global_dynamic_64 (Pmode
, rax
, x
, caddr
));
11631 insns
= get_insns ();
11634 if (GET_MODE (x
) != Pmode
)
11635 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11637 RTL_CONST_CALL_P (insns
) = 1;
11638 emit_libcall_block (insns
, dest
, rax
, x
);
11641 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
11645 case TLS_MODEL_LOCAL_DYNAMIC
:
11649 pic
= pic_offset_table_rtx
;
11652 pic
= gen_reg_rtx (Pmode
);
11653 emit_insn (gen_set_got (pic
));
11657 if (TARGET_GNU2_TLS
)
11659 rtx tmp
= ix86_tls_module_base ();
11661 base
= gen_reg_rtx (ptr_mode
);
11663 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, base
, tmp
));
11665 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
11667 tp
= get_thread_pointer (ptr_mode
, true);
11668 if (GET_MODE (base
) != Pmode
)
11669 base
= gen_rtx_ZERO_EXTEND (Pmode
, base
);
11670 base
= force_reg (Pmode
, base
);
11674 rtx caddr
= ix86_tls_get_addr ();
11676 base
= gen_reg_rtx (Pmode
);
11679 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
11685 (gen_tls_local_dynamic_base_64 (Pmode
, rax
, caddr
));
11686 insns
= get_insns ();
11689 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11690 share the LD_BASE result with other LD model accesses. */
11691 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11692 UNSPEC_TLS_LD_BASE
);
11694 RTL_CONST_CALL_P (insns
) = 1;
11695 emit_libcall_block (insns
, base
, rax
, eqv
);
11698 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
11701 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
11702 off
= gen_rtx_CONST (Pmode
, off
);
11704 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
11706 if (TARGET_GNU2_TLS
)
11708 if (GET_MODE (tp
) != Pmode
)
11710 dest
= lowpart_subreg (ptr_mode
, dest
, Pmode
);
11711 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
11712 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
11715 dest
= gen_rtx_PLUS (Pmode
, tp
, dest
);
11716 dest
= force_reg (Pmode
, dest
);
11718 if (GET_MODE (x
) != Pmode
)
11719 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11721 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
11725 case TLS_MODEL_INITIAL_EXEC
:
11728 if (TARGET_SUN_TLS
&& !TARGET_X32
)
11730 /* The Sun linker took the AMD64 TLS spec literally
11731 and can only handle %rax as destination of the
11732 initial executable code sequence. */
11734 dest
= gen_reg_rtx (DImode
);
11735 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
11739 /* Generate DImode references to avoid %fs:(%reg32)
11740 problems and linker IE->LE relaxation bug. */
11743 type
= UNSPEC_GOTNTPOFF
;
11747 pic
= pic_offset_table_rtx
;
11748 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
11750 else if (!TARGET_ANY_GNU_TLS
)
11752 pic
= gen_reg_rtx (Pmode
);
11753 emit_insn (gen_set_got (pic
));
11754 type
= UNSPEC_GOTTPOFF
;
11759 type
= UNSPEC_INDNTPOFF
;
11762 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
11763 off
= gen_rtx_CONST (tp_mode
, off
);
11765 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
11766 off
= gen_const_mem (tp_mode
, off
);
11767 set_mem_alias_set (off
, ix86_GOT_alias_set ());
11769 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11771 base
= get_thread_pointer (tp_mode
,
11772 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
11773 off
= force_reg (tp_mode
, off
);
11774 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
11775 if (tp_mode
!= Pmode
)
11776 dest
= convert_to_mode (Pmode
, dest
, 1);
11780 base
= get_thread_pointer (Pmode
, true);
11781 dest
= gen_reg_rtx (Pmode
);
11782 emit_insn (gen_sub3_insn (dest
, base
, off
));
11786 case TLS_MODEL_LOCAL_EXEC
:
11787 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
11788 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11789 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
11790 off
= gen_rtx_CONST (Pmode
, off
);
11792 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11794 base
= get_thread_pointer (Pmode
,
11795 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
11796 return gen_rtx_PLUS (Pmode
, base
, off
);
11800 base
= get_thread_pointer (Pmode
, true);
11801 dest
= gen_reg_rtx (Pmode
);
11802 emit_insn (gen_sub3_insn (dest
, base
, off
));
11807 gcc_unreachable ();
11813 /* Return true if the TLS address requires insn using integer registers.
11814 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
11815 MOV instructions, refer to PR103275. */
11817 ix86_gpr_tls_address_pattern_p (rtx mem
)
11819 gcc_assert (MEM_P (mem
));
11821 rtx addr
= XEXP (mem
, 0);
11822 subrtx_var_iterator::array_type array
;
11823 FOR_EACH_SUBRTX_VAR (iter
, array
, addr
, ALL
)
11826 if (GET_CODE (op
) == UNSPEC
)
11827 switch (XINT (op
, 1))
11829 case UNSPEC_GOTNTPOFF
:
11843 /* Return true if OP refers to a TLS address. */
11845 ix86_tls_address_pattern_p (rtx op
)
11847 subrtx_var_iterator::array_type array
;
11848 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
11853 rtx
*x
= &XEXP (op
, 0);
11854 while (GET_CODE (*x
) == PLUS
)
11857 for (i
= 0; i
< 2; i
++)
11859 rtx u
= XEXP (*x
, i
);
11860 if (GET_CODE (u
) == ZERO_EXTEND
)
11862 if (GET_CODE (u
) == UNSPEC
11863 && XINT (u
, 1) == UNSPEC_TP
)
11869 iter
.skip_subrtxes ();
11876 /* Rewrite *LOC so that it refers to a default TLS address space. */
11878 ix86_rewrite_tls_address_1 (rtx
*loc
)
11880 subrtx_ptr_iterator::array_type array
;
11881 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
11886 rtx addr
= XEXP (*loc
, 0);
11888 while (GET_CODE (*x
) == PLUS
)
11891 for (i
= 0; i
< 2; i
++)
11893 rtx u
= XEXP (*x
, i
);
11894 if (GET_CODE (u
) == ZERO_EXTEND
)
11896 if (GET_CODE (u
) == UNSPEC
11897 && XINT (u
, 1) == UNSPEC_TP
)
11899 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
11901 *x
= XEXP (*x
, 1 - i
);
11903 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
11904 set_mem_addr_space (*loc
, as
);
11911 iter
.skip_subrtxes ();
11916 /* Rewrite instruction pattern involvning TLS address
11917 so that it refers to a default TLS address space. */
11919 ix86_rewrite_tls_address (rtx pattern
)
11921 pattern
= copy_insn (pattern
);
11922 ix86_rewrite_tls_address_1 (&pattern
);
11926 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11927 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11928 unique refptr-DECL symbol corresponding to symbol DECL. */
11930 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
11932 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
11934 equal (tree_map
*a
, tree_map
*b
)
11936 return a
->base
.from
== b
->base
.from
;
11940 keep_cache_entry (tree_map
*&m
)
11942 return ggc_marked_p (m
->base
.from
);
11946 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
11949 get_dllimport_decl (tree decl
, bool beimport
)
11951 struct tree_map
*h
, in
;
11953 const char *prefix
;
11954 size_t namelen
, prefixlen
;
11959 if (!dllimport_map
)
11960 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
11962 in
.hash
= htab_hash_pointer (decl
);
11963 in
.base
.from
= decl
;
11964 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
11969 *loc
= h
= ggc_alloc
<tree_map
> ();
11971 h
->base
.from
= decl
;
11972 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
11973 VAR_DECL
, NULL
, ptr_type_node
);
11974 DECL_ARTIFICIAL (to
) = 1;
11975 DECL_IGNORED_P (to
) = 1;
11976 DECL_EXTERNAL (to
) = 1;
11977 TREE_READONLY (to
) = 1;
11979 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
11980 name
= targetm
.strip_name_encoding (name
);
11982 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
11983 ? "*__imp_" : "*__imp__";
11985 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
11986 namelen
= strlen (name
);
11987 prefixlen
= strlen (prefix
);
11988 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
11989 memcpy (imp_name
, prefix
, prefixlen
);
11990 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
11992 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
11993 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11994 SET_SYMBOL_REF_DECL (rtl
, to
);
11995 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
11998 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
11999 #ifdef SUB_TARGET_RECORD_STUB
12000 SUB_TARGET_RECORD_STUB (name
);
12004 rtl
= gen_const_mem (Pmode
, rtl
);
12005 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12007 SET_DECL_RTL (to
, rtl
);
12008 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12013 /* Expand SYMBOL into its corresponding far-address symbol.
12014 WANT_REG is true if we require the result be a register. */
12017 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
12022 gcc_assert (SYMBOL_REF_DECL (symbol
));
12023 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
12025 x
= DECL_RTL (imp_decl
);
12027 x
= force_reg (Pmode
, x
);
12031 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12032 true if we require the result be a register. */
12035 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12040 gcc_assert (SYMBOL_REF_DECL (symbol
));
12041 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
12043 x
= DECL_RTL (imp_decl
);
12045 x
= force_reg (Pmode
, x
);
12049 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12050 is true if we require the result be a register. */
12053 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
12055 if (!TARGET_PECOFF
)
12058 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12060 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12061 return legitimize_dllimport_symbol (addr
, inreg
);
12062 if (GET_CODE (addr
) == CONST
12063 && GET_CODE (XEXP (addr
, 0)) == PLUS
12064 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12065 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12067 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
12068 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12072 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
12074 if (GET_CODE (addr
) == SYMBOL_REF
12075 && !is_imported_p (addr
)
12076 && SYMBOL_REF_EXTERNAL_P (addr
)
12077 && SYMBOL_REF_DECL (addr
))
12078 return legitimize_pe_coff_extern_decl (addr
, inreg
);
12080 if (GET_CODE (addr
) == CONST
12081 && GET_CODE (XEXP (addr
, 0)) == PLUS
12082 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12083 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
12084 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
12085 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
12087 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
12088 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12093 /* Try machine-dependent ways of modifying an illegitimate address
12094 to be legitimate. If we find one, return the new, valid address.
12095 This macro is used in only one place: `memory_address' in explow.cc.
12097 OLDX is the address as it was before break_out_memory_refs was called.
12098 In some cases it is useful to look at this to decide what needs to be done.
12100 It is always safe for this macro to do nothing. It exists to recognize
12101 opportunities to optimize the output.
12103 For the 80386, we handle X+REG by loading X into a register R and
12104 using R+REG. R will go in a general reg and indexing will be used.
12105 However, if REG is a broken-out memory address or multiplication,
12106 nothing needs to be done because REG can certainly go in a general reg.
12108 When -fpic is used, special handling is needed for symbolic references.
12109 See comments by legitimize_pic_address in i386.cc for details. */
12112 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
12114 bool changed
= false;
12117 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12119 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12120 if (GET_CODE (x
) == CONST
12121 && GET_CODE (XEXP (x
, 0)) == PLUS
12122 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12123 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12125 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12126 (enum tls_model
) log
, false);
12127 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12130 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12132 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
12137 if (flag_pic
&& SYMBOLIC_CONST (x
))
12138 return legitimize_pic_address (x
, 0);
12141 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12142 return machopic_indirect_data_reference (x
, 0);
12145 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12146 if (GET_CODE (x
) == ASHIFT
12147 && CONST_INT_P (XEXP (x
, 1))
12148 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12151 log
= INTVAL (XEXP (x
, 1));
12152 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12153 GEN_INT (1 << log
));
12156 if (GET_CODE (x
) == PLUS
)
12158 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12160 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12161 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12162 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12165 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12166 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12167 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12168 GEN_INT (1 << log
));
12171 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12172 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12173 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12176 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12177 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12178 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12179 GEN_INT (1 << log
));
12182 /* Put multiply first if it isn't already. */
12183 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12185 std::swap (XEXP (x
, 0), XEXP (x
, 1));
12189 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12190 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12191 created by virtual register instantiation, register elimination, and
12192 similar optimizations. */
12193 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12196 x
= gen_rtx_PLUS (Pmode
,
12197 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12198 XEXP (XEXP (x
, 1), 0)),
12199 XEXP (XEXP (x
, 1), 1));
12203 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12204 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12205 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12206 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12207 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12208 && CONSTANT_P (XEXP (x
, 1)))
12211 rtx other
= NULL_RTX
;
12213 if (CONST_INT_P (XEXP (x
, 1)))
12215 constant
= XEXP (x
, 1);
12216 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12218 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12220 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12221 other
= XEXP (x
, 1);
12229 x
= gen_rtx_PLUS (Pmode
,
12230 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12231 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12232 plus_constant (Pmode
, other
,
12233 INTVAL (constant
)));
12237 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12240 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12243 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
12246 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12249 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
12253 && REG_P (XEXP (x
, 1))
12254 && REG_P (XEXP (x
, 0)))
12257 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12260 x
= legitimize_pic_address (x
, 0);
12263 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12266 if (REG_P (XEXP (x
, 0)))
12268 rtx temp
= gen_reg_rtx (Pmode
);
12269 rtx val
= force_operand (XEXP (x
, 1), temp
);
12272 val
= convert_to_mode (Pmode
, val
, 1);
12273 emit_move_insn (temp
, val
);
12276 XEXP (x
, 1) = temp
;
12280 else if (REG_P (XEXP (x
, 1)))
12282 rtx temp
= gen_reg_rtx (Pmode
);
12283 rtx val
= force_operand (XEXP (x
, 0), temp
);
12286 val
= convert_to_mode (Pmode
, val
, 1);
12287 emit_move_insn (temp
, val
);
12290 XEXP (x
, 0) = temp
;
12298 /* Print an integer constant expression in assembler syntax. Addition
12299 and subtraction are the only arithmetic that may appear in these
12300 expressions. FILE is the stdio stream to write to, X is the rtx, and
12301 CODE is the operand print code from the output string. */
12304 output_pic_addr_const (FILE *file
, rtx x
, int code
)
12308 switch (GET_CODE (x
))
12311 gcc_assert (flag_pic
);
12316 if (TARGET_64BIT
|| ! TARGET_MACHO_SYMBOL_STUBS
)
12317 output_addr_const (file
, x
);
12320 const char *name
= XSTR (x
, 0);
12322 /* Mark the decl as referenced so that cgraph will
12323 output the function. */
12324 if (SYMBOL_REF_DECL (x
))
12325 mark_decl_referenced (SYMBOL_REF_DECL (x
));
12328 if (MACHOPIC_INDIRECT
12329 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
12330 name
= machopic_indirection_name (x
, /*stub_p=*/true);
12332 assemble_name (file
, name
);
12334 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
12335 && code
== 'P' && ix86_call_use_plt_p (x
))
12336 fputs ("@PLT", file
);
12343 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
12344 assemble_name (asm_out_file
, buf
);
12348 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12352 /* This used to output parentheses around the expression,
12353 but that does not work on the 386 (either ATT or BSD assembler). */
12354 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12358 /* We can't handle floating point constants;
12359 TARGET_PRINT_OPERAND must handle them. */
12360 output_operand_lossage ("floating constant misused");
12364 /* Some assemblers need integer constants to appear first. */
12365 if (CONST_INT_P (XEXP (x
, 0)))
12367 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12369 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12373 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
12374 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12376 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12382 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
12383 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12385 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12387 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
12391 gcc_assert (XVECLEN (x
, 0) == 1);
12392 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
12393 switch (XINT (x
, 1))
12396 fputs ("@GOT", file
);
12398 case UNSPEC_GOTOFF
:
12399 fputs ("@GOTOFF", file
);
12401 case UNSPEC_PLTOFF
:
12402 fputs ("@PLTOFF", file
);
12405 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12406 "(%rip)" : "[rip]", file
);
12408 case UNSPEC_GOTPCREL
:
12409 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12410 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
12412 case UNSPEC_GOTTPOFF
:
12413 /* FIXME: This might be @TPOFF in Sun ld too. */
12414 fputs ("@gottpoff", file
);
12417 fputs ("@tpoff", file
);
12419 case UNSPEC_NTPOFF
:
12421 fputs ("@tpoff", file
);
12423 fputs ("@ntpoff", file
);
12425 case UNSPEC_DTPOFF
:
12426 fputs ("@dtpoff", file
);
12428 case UNSPEC_GOTNTPOFF
:
12430 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12431 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
12433 fputs ("@gotntpoff", file
);
12435 case UNSPEC_INDNTPOFF
:
12436 fputs ("@indntpoff", file
);
12439 case UNSPEC_MACHOPIC_OFFSET
:
12441 machopic_output_function_base_name (file
);
12445 output_operand_lossage ("invalid UNSPEC as operand");
12451 output_operand_lossage ("invalid expression as operand");
12455 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12456 We need to emit DTP-relative relocations. */
12458 static void ATTRIBUTE_UNUSED
12459 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
12461 fputs (ASM_LONG
, file
);
12462 output_addr_const (file
, x
);
12463 fputs ("@dtpoff", file
);
12469 fputs (", 0", file
);
12472 gcc_unreachable ();
12476 /* Return true if X is a representation of the PIC register. This copes
12477 with calls from ix86_find_base_term, where the register might have
12478 been replaced by a cselib value. */
12481 ix86_pic_register_p (rtx x
)
12483 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
12484 return (pic_offset_table_rtx
12485 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
12486 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SET_GOT
)
12488 else if (!REG_P (x
))
12490 else if (pic_offset_table_rtx
)
12492 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
12494 if (HARD_REGISTER_P (x
)
12495 && !HARD_REGISTER_P (pic_offset_table_rtx
)
12496 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
12501 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
12504 /* Helper function for ix86_delegitimize_address.
12505 Attempt to delegitimize TLS local-exec accesses. */
12508 ix86_delegitimize_tls_address (rtx orig_x
)
12510 rtx x
= orig_x
, unspec
;
12511 struct ix86_address addr
;
12513 if (!TARGET_TLS_DIRECT_SEG_REFS
)
12517 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
12519 if (ix86_decompose_address (x
, &addr
) == 0
12520 || addr
.seg
!= DEFAULT_TLS_SEG_REG
12521 || addr
.disp
== NULL_RTX
12522 || GET_CODE (addr
.disp
) != CONST
)
12524 unspec
= XEXP (addr
.disp
, 0);
12525 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
12526 unspec
= XEXP (unspec
, 0);
12527 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
12529 x
= XVECEXP (unspec
, 0, 0);
12530 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
12531 if (unspec
!= XEXP (addr
.disp
, 0))
12532 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
12535 rtx idx
= addr
.index
;
12536 if (addr
.scale
!= 1)
12537 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
12538 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
12541 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
12542 if (MEM_P (orig_x
))
12543 x
= replace_equiv_address_nv (orig_x
, x
);
12547 /* In the name of slightly smaller debug output, and to cater to
12548 general assembler lossage, recognize PIC+GOTOFF and turn it back
12549 into a direct symbol reference.
12551 On Darwin, this is necessary to avoid a crash, because Darwin
12552 has a different PIC label for each routine but the DWARF debugging
12553 information is not associated with any particular routine, so it's
12554 necessary to remove references to the PIC label from RTL stored by
12555 the DWARF output code.
12557 This helper is used in the normal ix86_delegitimize_address
12558 entrypoint (e.g. used in the target delegitimization hook) and
12559 in ix86_find_base_term. As compile time memory optimization, we
12560 avoid allocating rtxes that will not change anything on the outcome
12561 of the callers (find_base_value and find_base_term). */
12564 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
12566 rtx orig_x
= delegitimize_mem_from_attrs (x
);
12567 /* addend is NULL or some rtx if x is something+GOTOFF where
12568 something doesn't include the PIC register. */
12569 rtx addend
= NULL_RTX
;
12570 /* reg_addend is NULL or a multiple of some register. */
12571 rtx reg_addend
= NULL_RTX
;
12572 /* const_addend is NULL or a const_int. */
12573 rtx const_addend
= NULL_RTX
;
12574 /* This is the result, or NULL. */
12575 rtx result
= NULL_RTX
;
12584 if (GET_CODE (x
) == CONST
12585 && GET_CODE (XEXP (x
, 0)) == PLUS
12586 && GET_MODE (XEXP (x
, 0)) == Pmode
12587 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12588 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
12589 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
12591 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12592 base. A CONST can't be arg_pointer_rtx based. */
12593 if (base_term_p
&& MEM_P (orig_x
))
12595 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
12596 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
12597 if (MEM_P (orig_x
))
12598 x
= replace_equiv_address_nv (orig_x
, x
);
12602 if (GET_CODE (x
) == CONST
12603 && GET_CODE (XEXP (x
, 0)) == UNSPEC
12604 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
12605 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
12606 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
12608 x
= XVECEXP (XEXP (x
, 0), 0, 0);
12609 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
12611 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
12618 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
12619 return ix86_delegitimize_tls_address (orig_x
);
12621 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12622 and -mcmodel=medium -fpic. */
12625 if (GET_CODE (x
) != PLUS
12626 || GET_CODE (XEXP (x
, 1)) != CONST
)
12627 return ix86_delegitimize_tls_address (orig_x
);
12629 if (ix86_pic_register_p (XEXP (x
, 0)))
12630 /* %ebx + GOT/GOTOFF */
12632 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
12634 /* %ebx + %reg * scale + GOT/GOTOFF */
12635 reg_addend
= XEXP (x
, 0);
12636 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
12637 reg_addend
= XEXP (reg_addend
, 1);
12638 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
12639 reg_addend
= XEXP (reg_addend
, 0);
12642 reg_addend
= NULL_RTX
;
12643 addend
= XEXP (x
, 0);
12647 addend
= XEXP (x
, 0);
12649 x
= XEXP (XEXP (x
, 1), 0);
12650 if (GET_CODE (x
) == PLUS
12651 && CONST_INT_P (XEXP (x
, 1)))
12653 const_addend
= XEXP (x
, 1);
12657 if (GET_CODE (x
) == UNSPEC
12658 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
12659 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
12660 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
12661 && !MEM_P (orig_x
) && !addend
)))
12662 result
= XVECEXP (x
, 0, 0);
12664 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
12665 && !MEM_P (orig_x
))
12666 result
= XVECEXP (x
, 0, 0);
12669 return ix86_delegitimize_tls_address (orig_x
);
12671 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12672 recurse on the first operand. */
12673 if (const_addend
&& !base_term_p
)
12674 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
12676 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
12679 /* If the rest of original X doesn't involve the PIC register, add
12680 addend and subtract pic_offset_table_rtx. This can happen e.g.
12682 leal (%ebx, %ecx, 4), %ecx
12684 movl foo@GOTOFF(%ecx), %edx
12685 in which case we return (%ecx - %ebx) + foo
12686 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12687 and reload has completed. Don't do the latter for debug,
12688 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12689 if (pic_offset_table_rtx
12690 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
12691 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
12692 pic_offset_table_rtx
),
12694 else if (base_term_p
12695 && pic_offset_table_rtx
12697 && !TARGET_VXWORKS_RTP
)
12699 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
12700 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
12701 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
12706 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
12708 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
12709 if (result
== NULL_RTX
)
12715 /* The normal instantiation of the above template. */
12718 ix86_delegitimize_address (rtx x
)
12720 return ix86_delegitimize_address_1 (x
, false);
12723 /* If X is a machine specific address (i.e. a symbol or label being
12724 referenced as a displacement from the GOT implemented using an
12725 UNSPEC), then return the base term. Otherwise return X. */
12728 ix86_find_base_term (rtx x
)
12734 if (GET_CODE (x
) != CONST
)
12736 term
= XEXP (x
, 0);
12737 if (GET_CODE (term
) == PLUS
12738 && CONST_INT_P (XEXP (term
, 1)))
12739 term
= XEXP (term
, 0);
12740 if (GET_CODE (term
) != UNSPEC
12741 || (XINT (term
, 1) != UNSPEC_GOTPCREL
12742 && XINT (term
, 1) != UNSPEC_PCREL
))
12745 return XVECEXP (term
, 0, 0);
12748 return ix86_delegitimize_address_1 (x
, true);
12751 /* Return true if X shouldn't be emitted into the debug info.
12752 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12753 symbol easily into the .debug_info section, so we need not to
12754 delegitimize, but instead assemble as @gotoff.
12755 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12756 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12759 ix86_const_not_ok_for_debug_p (rtx x
)
12761 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
12764 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
12771 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
12772 bool fp
, FILE *file
)
12774 const char *suffix
;
12776 if (mode
== CCFPmode
)
12778 code
= ix86_fp_compare_code_to_integer (code
);
12782 code
= reverse_condition (code
);
12787 gcc_assert (mode
!= CCGZmode
);
12811 gcc_assert (mode
!= CCGZmode
);
12835 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
12839 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12840 Those same assemblers have the same but opposite lossage on cmov. */
12841 if (mode
== CCmode
)
12842 suffix
= fp
? "nbe" : "a";
12844 gcc_unreachable ();
12861 gcc_unreachable ();
12865 if (mode
== CCmode
|| mode
== CCGZmode
)
12867 else if (mode
== CCCmode
)
12868 suffix
= fp
? "b" : "c";
12870 gcc_unreachable ();
12887 gcc_unreachable ();
12891 if (mode
== CCmode
|| mode
== CCGZmode
)
12893 else if (mode
== CCCmode
)
12894 suffix
= fp
? "nb" : "nc";
12896 gcc_unreachable ();
12899 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
12903 if (mode
== CCmode
)
12906 gcc_unreachable ();
12909 suffix
= fp
? "u" : "p";
12912 suffix
= fp
? "nu" : "np";
12915 gcc_unreachable ();
12917 fputs (suffix
, file
);
12920 /* Print the name of register X to FILE based on its machine mode and number.
12921 If CODE is 'w', pretend the mode is HImode.
12922 If CODE is 'b', pretend the mode is QImode.
12923 If CODE is 'k', pretend the mode is SImode.
12924 If CODE is 'q', pretend the mode is DImode.
12925 If CODE is 'x', pretend the mode is V4SFmode.
12926 If CODE is 't', pretend the mode is V8SFmode.
12927 If CODE is 'g', pretend the mode is V16SFmode.
12928 If CODE is 'h', pretend the reg is the 'high' byte register.
12929 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12930 If CODE is 'd', duplicate the operand for AVX instruction.
12931 If CODE is 'V', print naked full integer register name without %.
12935 print_reg (rtx x
, int code
, FILE *file
)
12939 unsigned int regno
;
12942 if (ASSEMBLER_DIALECT
== ASM_ATT
&& code
!= 'V')
12947 gcc_assert (TARGET_64BIT
);
12948 fputs ("rip", file
);
12952 if (code
== 'y' && STACK_TOP_P (x
))
12954 fputs ("st(0)", file
);
12960 else if (code
== 'b')
12962 else if (code
== 'k')
12964 else if (code
== 'q')
12966 else if (code
== 'h')
12968 else if (code
== 'x')
12970 else if (code
== 't')
12972 else if (code
== 'g')
12975 msize
= GET_MODE_SIZE (GET_MODE (x
));
12979 if (regno
== ARG_POINTER_REGNUM
12980 || regno
== FRAME_POINTER_REGNUM
12981 || regno
== FPSR_REG
)
12983 output_operand_lossage
12984 ("invalid use of register '%s'", reg_names
[regno
]);
12987 else if (regno
== FLAGS_REG
)
12989 output_operand_lossage ("invalid use of asm flag output");
12995 if (GENERAL_REGNO_P (regno
))
12996 msize
= GET_MODE_SIZE (word_mode
);
12998 error ("%<V%> modifier on non-integer register");
13001 duplicated
= code
== 'd' && TARGET_AVX
;
13008 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
13009 warning (0, "unsupported size for integer register");
13012 if (LEGACY_INT_REGNO_P (regno
))
13013 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
13017 reg
= hi_reg_name
[regno
];
13020 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13022 if (!ANY_QI_REGNO_P (regno
))
13023 error ("unsupported size for integer register");
13024 reg
= qi_reg_name
[regno
];
13027 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13029 reg
= qi_high_reg_name
[regno
];
13033 if (SSE_REGNO_P (regno
))
13035 gcc_assert (!duplicated
);
13036 putc (msize
== 32 ? 'y' : 'z', file
);
13037 reg
= hi_reg_name
[regno
] + 1;
13042 gcc_unreachable ();
13047 /* Irritatingly, AMD extended registers use
13048 different naming convention: "r%d[bwd]" */
13049 if (REX_INT_REGNO_P (regno
))
13051 gcc_assert (TARGET_64BIT
);
13055 error ("extended registers have no high halves");
13070 error ("unsupported operand size for extended register");
13078 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13079 fprintf (file
, ", %%%s", reg
);
13081 fprintf (file
, ", %s", reg
);
13085 /* Meaning of CODE:
13086 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13087 C -- print opcode suffix for set/cmov insn.
13088 c -- like C, but print reversed condition
13089 F,f -- likewise, but for floating-point.
13090 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13092 R -- print embedded rounding and sae.
13093 r -- print only sae.
13094 z -- print the opcode suffix for the size of the current operand.
13095 Z -- likewise, with special suffixes for x87 instructions.
13096 * -- print a star (in certain assembler syntax)
13097 A -- print an absolute memory reference.
13098 E -- print address with DImode register names if TARGET_64BIT.
13099 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13100 s -- print a shift double count, followed by the assemblers argument
13102 b -- print the QImode name of the register for the indicated operand.
13103 %b0 would print %al if operands[0] is reg 0.
13104 w -- likewise, print the HImode name of the register.
13105 k -- likewise, print the SImode name of the register.
13106 q -- likewise, print the DImode name of the register.
13107 x -- likewise, print the V4SFmode name of the register.
13108 t -- likewise, print the V8SFmode name of the register.
13109 g -- likewise, print the V16SFmode name of the register.
13110 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13111 y -- print "st(0)" instead of "st" as a register.
13112 d -- print duplicated register operand for AVX instruction.
13113 D -- print condition for SSE cmp instruction.
13114 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13116 p -- print raw symbol name.
13117 X -- don't print any sort of PIC '@' suffix for a symbol.
13118 & -- print some in-use local-dynamic symbol name.
13119 H -- print a memory address offset by 8; used for sse high-parts
13120 Y -- print condition for XOP pcom* instruction.
13121 V -- print naked full integer register name without %.
13122 + -- print a branch hint as 'cs' or 'ds' prefix
13123 ; -- print a semicolon (after prefixes due to bug in older gas).
13124 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13125 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13126 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13127 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13128 N -- print maskz if it's constant 0 operand.
13132 ix86_print_operand (FILE *file
, rtx x
, int code
)
13139 switch (ASSEMBLER_DIALECT
)
13146 /* Intel syntax. For absolute addresses, registers should not
13147 be surrounded by braces. */
13151 ix86_print_operand (file
, x
, 0);
13158 gcc_unreachable ();
13161 ix86_print_operand (file
, x
, 0);
13165 /* Wrap address in an UNSPEC to declare special handling. */
13167 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13169 output_address (VOIDmode
, x
);
13173 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13178 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13183 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13188 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13193 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13198 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13203 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13204 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
13207 switch (GET_MODE_SIZE (GET_MODE (x
)))
13222 output_operand_lossage ("invalid operand size for operand "
13232 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13234 /* Opcodes don't get size suffixes if using Intel opcodes. */
13235 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13238 switch (GET_MODE_SIZE (GET_MODE (x
)))
13257 output_operand_lossage ("invalid operand size for operand "
13263 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13265 if (this_is_asm_operands
)
13266 warning_for_asm (this_is_asm_operands
,
13267 "non-integer operand used with operand code %<z%>");
13269 warning (0, "non-integer operand used with operand code %<z%>");
13274 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13275 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13278 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13280 switch (GET_MODE_SIZE (GET_MODE (x
)))
13283 #ifdef HAVE_AS_IX86_FILDS
13293 #ifdef HAVE_AS_IX86_FILDQ
13296 fputs ("ll", file
);
13304 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13306 /* 387 opcodes don't get size suffixes
13307 if the operands are registers. */
13308 if (STACK_REG_P (x
))
13311 switch (GET_MODE_SIZE (GET_MODE (x
)))
13332 output_operand_lossage ("invalid operand type used with "
13333 "operand code '%c'", code
);
13337 output_operand_lossage ("invalid operand size for operand code '%c'",
13358 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13360 ix86_print_operand (file
, x
, 0);
13361 fputs (", ", file
);
13366 switch (GET_CODE (x
))
13369 fputs ("neq", file
);
13372 fputs ("eq", file
);
13376 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
13380 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
13384 fputs ("le", file
);
13388 fputs ("lt", file
);
13391 fputs ("unord", file
);
13394 fputs ("ord", file
);
13397 fputs ("ueq", file
);
13400 fputs ("nlt", file
);
13403 fputs ("nle", file
);
13406 fputs ("ule", file
);
13409 fputs ("ult", file
);
13412 fputs ("une", file
);
13415 output_operand_lossage ("operand is not a condition code, "
13416 "invalid operand code 'Y'");
13422 /* Little bit of braindamage here. The SSE compare instructions
13423 does use completely different names for the comparisons that the
13424 fp conditional moves. */
13425 switch (GET_CODE (x
))
13430 fputs ("eq_us", file
);
13435 fputs ("eq", file
);
13440 fputs ("nge", file
);
13445 fputs ("lt", file
);
13450 fputs ("ngt", file
);
13455 fputs ("le", file
);
13458 fputs ("unord", file
);
13463 fputs ("neq_oq", file
);
13468 fputs ("neq", file
);
13473 fputs ("ge", file
);
13478 fputs ("nlt", file
);
13483 fputs ("gt", file
);
13488 fputs ("nle", file
);
13491 fputs ("ord", file
);
13494 output_operand_lossage ("operand is not a condition code, "
13495 "invalid operand code 'D'");
13502 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13503 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13505 gcc_fallthrough ();
13510 if (!COMPARISON_P (x
))
13512 output_operand_lossage ("operand is not a condition code, "
13513 "invalid operand code '%c'", code
);
13516 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
13517 code
== 'c' || code
== 'f',
13518 code
== 'F' || code
== 'f',
13523 if (!offsettable_memref_p (x
))
13525 output_operand_lossage ("operand is not an offsettable memory "
13526 "reference, invalid operand code 'H'");
13529 /* It doesn't actually matter what mode we use here, as we're
13530 only going to use this for printing. */
13531 x
= adjust_address_nv (x
, DImode
, 8);
13532 /* Output 'qword ptr' for intel assembler dialect. */
13533 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13538 if (!CONST_INT_P (x
))
13540 output_operand_lossage ("operand is not an integer, invalid "
13541 "operand code 'K'");
13545 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
13546 #ifdef HAVE_AS_IX86_HLE
13547 fputs ("xacquire ", file
);
13549 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
13551 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
13552 #ifdef HAVE_AS_IX86_HLE
13553 fputs ("xrelease ", file
);
13555 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
13557 /* We do not want to print value of the operand. */
13561 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
13562 fputs ("{z}", file
);
13566 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
13568 output_operand_lossage ("operand is not a specific integer, "
13569 "invalid operand code 'r'");
13573 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13574 fputs (", ", file
);
13576 fputs ("{sae}", file
);
13578 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13579 fputs (", ", file
);
13584 if (!CONST_INT_P (x
))
13586 output_operand_lossage ("operand is not an integer, invalid "
13587 "operand code 'R'");
13591 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13592 fputs (", ", file
);
13594 switch (INTVAL (x
))
13596 case ROUND_NEAREST_INT
| ROUND_SAE
:
13597 fputs ("{rn-sae}", file
);
13599 case ROUND_NEG_INF
| ROUND_SAE
:
13600 fputs ("{rd-sae}", file
);
13602 case ROUND_POS_INF
| ROUND_SAE
:
13603 fputs ("{ru-sae}", file
);
13605 case ROUND_ZERO
| ROUND_SAE
:
13606 fputs ("{rz-sae}", file
);
13609 output_operand_lossage ("operand is not a specific integer, "
13610 "invalid operand code 'R'");
13613 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13614 fputs (", ", file
);
13619 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13625 const char *name
= get_some_local_dynamic_name ();
13627 output_operand_lossage ("'%%&' used without any "
13628 "local dynamic TLS references");
13630 assemble_name (file
, name
);
13639 || optimize_function_for_size_p (cfun
)
13640 || !TARGET_BRANCH_PREDICTION_HINTS
)
13643 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
13646 int pred_val
= profile_probability::from_reg_br_prob_note
13647 (XINT (x
, 0)).to_reg_br_prob_base ();
13649 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
13650 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
13652 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
13654 = final_forward_branch_p (current_output_insn
) == 0;
13656 /* Emit hints only in the case default branch prediction
13657 heuristics would fail. */
13658 if (taken
!= cputaken
)
13660 /* We use 3e (DS) prefix for taken branches and
13661 2e (CS) prefix for not taken branches. */
13663 fputs ("ds ; ", file
);
13665 fputs ("cs ; ", file
);
13673 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13679 putc (TARGET_AVX2
? 'i' : 'f', file
);
13685 /* NB: 32-bit indices in VSIB address are sign-extended
13686 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13687 sign-extended to 0xfffffffff7fa3010 which is invalid
13688 address. Add addr32 prefix if there is no base
13689 register nor symbol. */
13691 struct ix86_address parts
;
13692 ok
= ix86_decompose_address (x
, &parts
);
13693 gcc_assert (ok
&& parts
.index
== NULL_RTX
);
13694 if (parts
.base
== NULL_RTX
13695 && (parts
.disp
== NULL_RTX
13696 || !symbolic_operand (parts
.disp
,
13697 GET_MODE (parts
.disp
))))
13698 fputs ("addr32 ", file
);
13703 if (TARGET_64BIT
&& Pmode
!= word_mode
)
13704 fputs ("addr32 ", file
);
13708 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
13709 fputs ("notrack ", file
);
13713 output_operand_lossage ("invalid operand code '%c'", code
);
13718 print_reg (x
, code
, file
);
13720 else if (MEM_P (x
))
13722 rtx addr
= XEXP (x
, 0);
13724 /* No `byte ptr' prefix for call instructions ... */
13725 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
13727 machine_mode mode
= GET_MODE (x
);
13730 /* Check for explicit size override codes. */
13733 else if (code
== 'w')
13735 else if (code
== 'k')
13737 else if (code
== 'q')
13739 else if (code
== 'x')
13741 else if (code
== 't')
13743 else if (code
== 'g')
13745 else if (mode
== BLKmode
)
13746 /* ... or BLKmode operands, when not overridden. */
13749 switch (GET_MODE_SIZE (mode
))
13751 case 1: size
= "BYTE"; break;
13752 case 2: size
= "WORD"; break;
13753 case 4: size
= "DWORD"; break;
13754 case 8: size
= "QWORD"; break;
13755 case 12: size
= "TBYTE"; break;
13757 if (mode
== XFmode
)
13762 case 32: size
= "YMMWORD"; break;
13763 case 64: size
= "ZMMWORD"; break;
13765 gcc_unreachable ();
13769 fputs (size
, file
);
13770 fputs (" PTR ", file
);
13774 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
13775 output_operand_lossage ("invalid constraints for operand");
13777 ix86_print_operand_address_as
13778 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
13781 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == HFmode
)
13783 long l
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (x
),
13784 REAL_MODE_FORMAT (HFmode
));
13785 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13787 fprintf (file
, "0x%04x", (unsigned int) l
);
13790 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
13794 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
13796 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13798 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13800 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
13801 (unsigned long long) (int) l
);
13803 fprintf (file
, "0x%08x", (unsigned int) l
);
13806 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
13810 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
13812 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13814 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
13817 /* These float cases don't actually occur as immediate operands. */
13818 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
13822 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
13823 fputs (dstr
, file
);
13826 /* Print bcst_mem_operand. */
13827 else if (GET_CODE (x
) == VEC_DUPLICATE
)
13829 machine_mode vmode
= GET_MODE (x
);
13830 /* Must be bcst_memory_operand. */
13831 gcc_assert (bcst_mem_operand (x
, vmode
));
13833 rtx mem
= XEXP (x
,0);
13834 ix86_print_operand (file
, mem
, 0);
13840 fputs ("{1to2}", file
);
13846 fputs ("{1to4}", file
);
13853 fputs ("{1to8}", file
);
13858 fputs ("{1to16}", file
);
13861 fputs ("{1to32}", file
);
13864 gcc_unreachable ();
13870 /* We have patterns that allow zero sets of memory, for instance.
13871 In 64-bit mode, we should probably support all 8-byte vectors,
13872 since we can in fact encode that into an immediate. */
13873 if (GET_CODE (x
) == CONST_VECTOR
)
13875 if (x
!= CONST0_RTX (GET_MODE (x
)))
13876 output_operand_lossage ("invalid vector immediate");
13882 if (ix86_force_load_from_GOT_p (x
, true))
13884 /* For inline assembly statement, load function address
13885 from GOT with 'P' operand modifier to avoid PLT. */
13886 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13890 x
= gen_rtx_CONST (Pmode
, x
);
13891 x
= gen_const_mem (Pmode
, x
);
13892 ix86_print_operand (file
, x
, 'A');
13896 else if (code
!= 'p')
13898 if (CONST_INT_P (x
))
13900 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13903 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
13904 || GET_CODE (x
) == LABEL_REF
)
13906 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13909 fputs ("OFFSET FLAT:", file
);
13912 if (CONST_INT_P (x
))
13913 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13914 else if (flag_pic
|| MACHOPIC_INDIRECT
)
13915 output_pic_addr_const (file
, x
, code
);
13917 output_addr_const (file
, x
);
13922 ix86_print_operand_punct_valid_p (unsigned char code
)
13924 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
13925 || code
== '~' || code
== '^' || code
== '!');
13928 /* Print a memory operand whose address is ADDR. */
13931 ix86_print_operand_address_as (FILE *file
, rtx addr
,
13932 addr_space_t as
, bool raw
)
13934 struct ix86_address parts
;
13935 rtx base
, index
, disp
;
13941 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
13943 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13944 gcc_assert (parts
.index
== NULL_RTX
);
13945 parts
.index
= XVECEXP (addr
, 0, 1);
13946 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
13947 addr
= XVECEXP (addr
, 0, 0);
13950 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
13952 gcc_assert (TARGET_64BIT
);
13953 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13957 ok
= ix86_decompose_address (addr
, &parts
);
13962 index
= parts
.index
;
13964 scale
= parts
.scale
;
13966 if (ADDR_SPACE_GENERIC_P (as
))
13969 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
13971 if (!ADDR_SPACE_GENERIC_P (as
) && !raw
)
13973 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13978 case ADDR_SPACE_SEG_FS
:
13979 fputs ("fs:", file
);
13981 case ADDR_SPACE_SEG_GS
:
13982 fputs ("gs:", file
);
13985 gcc_unreachable ();
13989 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13990 if (TARGET_64BIT
&& !base
&& !index
&& !raw
)
13994 if (GET_CODE (disp
) == CONST
13995 && GET_CODE (XEXP (disp
, 0)) == PLUS
13996 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13997 symbol
= XEXP (XEXP (disp
, 0), 0);
13999 if (GET_CODE (symbol
) == LABEL_REF
14000 || (GET_CODE (symbol
) == SYMBOL_REF
14001 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14005 if (!base
&& !index
)
14007 /* Displacement only requires special attention. */
14008 if (CONST_INT_P (disp
))
14010 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
14011 fputs ("ds:", file
);
14012 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14014 /* Load the external function address via the GOT slot to avoid PLT. */
14015 else if (GET_CODE (disp
) == CONST
14016 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
14017 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
14018 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
14019 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
14020 output_pic_addr_const (file
, disp
, 0);
14022 output_pic_addr_const (file
, disp
, 0);
14024 output_addr_const (file
, disp
);
14028 /* Print SImode register names to force addr32 prefix. */
14029 if (SImode_address_operand (addr
, VOIDmode
))
14033 gcc_assert (TARGET_64BIT
);
14034 switch (GET_CODE (addr
))
14037 gcc_assert (GET_MODE (addr
) == SImode
);
14038 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14042 gcc_assert (GET_MODE (addr
) == DImode
);
14045 gcc_unreachable ();
14048 gcc_assert (!code
);
14054 && CONST_INT_P (disp
)
14055 && INTVAL (disp
) < -16*1024*1024)
14057 /* X32 runs in 64-bit mode, where displacement, DISP, in
14058 address DISP(%r64), is encoded as 32-bit immediate sign-
14059 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14060 address is %r64 + 0xffffffffbffffd00. When %r64 <
14061 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14062 which is invalid for x32. The correct address is %r64
14063 - 0x40000300 == 0xf7ffdd64. To properly encode
14064 -0x40000300(%r64) for x32, we zero-extend negative
14065 displacement by forcing addr32 prefix which truncates
14066 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14067 zero-extend all negative displacements, including -1(%rsp).
14068 However, for small negative displacements, sign-extension
14069 won't cause overflow. We only zero-extend negative
14070 displacements if they < -16*1024*1024, which is also used
14071 to check legitimate address displacements for PIC. */
14075 /* Since the upper 32 bits of RSP are always zero for x32,
14076 we can encode %esp as %rsp to avoid 0x67 prefix if
14077 there is no index register. */
14078 if (TARGET_X32
&& Pmode
== SImode
14079 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
14082 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14087 output_pic_addr_const (file
, disp
, 0);
14088 else if (GET_CODE (disp
) == LABEL_REF
)
14089 output_asm_label (disp
);
14091 output_addr_const (file
, disp
);
14096 print_reg (base
, code
, file
);
14100 print_reg (index
, vsib
? 0 : code
, file
);
14101 if (scale
!= 1 || vsib
)
14102 fprintf (file
, ",%d", scale
);
14108 rtx offset
= NULL_RTX
;
14112 /* Pull out the offset of a symbol; print any symbol itself. */
14113 if (GET_CODE (disp
) == CONST
14114 && GET_CODE (XEXP (disp
, 0)) == PLUS
14115 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14117 offset
= XEXP (XEXP (disp
, 0), 1);
14118 disp
= gen_rtx_CONST (VOIDmode
,
14119 XEXP (XEXP (disp
, 0), 0));
14123 output_pic_addr_const (file
, disp
, 0);
14124 else if (GET_CODE (disp
) == LABEL_REF
)
14125 output_asm_label (disp
);
14126 else if (CONST_INT_P (disp
))
14129 output_addr_const (file
, disp
);
14135 print_reg (base
, code
, file
);
14138 if (INTVAL (offset
) >= 0)
14140 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14144 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14151 print_reg (index
, vsib
? 0 : code
, file
);
14152 if (scale
!= 1 || vsib
)
14153 fprintf (file
, "*%d", scale
);
14161 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
14163 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
14164 output_operand_lossage ("invalid constraints for operand");
14166 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
14169 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14172 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14176 if (GET_CODE (x
) != UNSPEC
)
14179 op
= XVECEXP (x
, 0, 0);
14180 switch (XINT (x
, 1))
14182 case UNSPEC_GOTOFF
:
14183 output_addr_const (file
, op
);
14184 fputs ("@gotoff", file
);
14186 case UNSPEC_GOTTPOFF
:
14187 output_addr_const (file
, op
);
14188 /* FIXME: This might be @TPOFF in Sun ld. */
14189 fputs ("@gottpoff", file
);
14192 output_addr_const (file
, op
);
14193 fputs ("@tpoff", file
);
14195 case UNSPEC_NTPOFF
:
14196 output_addr_const (file
, op
);
14198 fputs ("@tpoff", file
);
14200 fputs ("@ntpoff", file
);
14202 case UNSPEC_DTPOFF
:
14203 output_addr_const (file
, op
);
14204 fputs ("@dtpoff", file
);
14206 case UNSPEC_GOTNTPOFF
:
14207 output_addr_const (file
, op
);
14209 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14210 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14212 fputs ("@gotntpoff", file
);
14214 case UNSPEC_INDNTPOFF
:
14215 output_addr_const (file
, op
);
14216 fputs ("@indntpoff", file
);
14219 case UNSPEC_MACHOPIC_OFFSET
:
14220 output_addr_const (file
, op
);
14222 machopic_output_function_base_name (file
);
14234 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14235 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14236 is the expression of the binary operation. The output may either be
14237 emitted here, or returned to the caller, like all output_* functions.
14239 There is no guarantee that the operands are the same mode, as they
14240 might be within FLOAT or FLOAT_EXTEND expressions. */
14242 #ifndef SYSV386_COMPAT
14243 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14244 wants to fix the assemblers because that causes incompatibility
14245 with gcc. No-one wants to fix gcc because that causes
14246 incompatibility with assemblers... You can use the option of
14247 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14248 #define SYSV386_COMPAT 1
14252 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
14254 static char buf
[40];
14257 = (SSE_REG_P (operands
[0])
14258 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
14262 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14263 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14270 switch (GET_CODE (operands
[3]))
14281 gcc_unreachable ();
14288 p
= GET_MODE (operands
[0]) == SFmode
? "ss" : "sd";
14292 p
= "\t{%2, %1, %0|%0, %1, %2}";
14294 p
= "\t{%2, %0|%0, %2}";
14300 /* Even if we do not want to check the inputs, this documents input
14301 constraints. Which helps in understanding the following code. */
14304 if (STACK_REG_P (operands
[0])
14305 && ((REG_P (operands
[1])
14306 && REGNO (operands
[0]) == REGNO (operands
[1])
14307 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14308 || (REG_P (operands
[2])
14309 && REGNO (operands
[0]) == REGNO (operands
[2])
14310 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14311 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14314 gcc_unreachable ();
14317 switch (GET_CODE (operands
[3]))
14321 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14322 std::swap (operands
[1], operands
[2]);
14324 /* know operands[0] == operands[1]. */
14326 if (MEM_P (operands
[2]))
14332 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14334 if (STACK_TOP_P (operands
[0]))
14335 /* How is it that we are storing to a dead operand[2]?
14336 Well, presumably operands[1] is dead too. We can't
14337 store the result to st(0) as st(0) gets popped on this
14338 instruction. Instead store to operands[2] (which I
14339 think has to be st(1)). st(1) will be popped later.
14340 gcc <= 2.8.1 didn't have this check and generated
14341 assembly code that the Unixware assembler rejected. */
14342 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14344 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14348 if (STACK_TOP_P (operands
[0]))
14349 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14351 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14356 if (MEM_P (operands
[1]))
14362 if (MEM_P (operands
[2]))
14368 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14371 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14372 derived assemblers, confusingly reverse the direction of
14373 the operation for fsub{r} and fdiv{r} when the
14374 destination register is not st(0). The Intel assembler
14375 doesn't have this brain damage. Read !SYSV386_COMPAT to
14376 figure out what the hardware really does. */
14377 if (STACK_TOP_P (operands
[0]))
14378 p
= "{p\t%0, %2|rp\t%2, %0}";
14380 p
= "{rp\t%2, %0|p\t%0, %2}";
14382 if (STACK_TOP_P (operands
[0]))
14383 /* As above for fmul/fadd, we can't store to st(0). */
14384 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14386 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14391 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14394 if (STACK_TOP_P (operands
[0]))
14395 p
= "{rp\t%0, %1|p\t%1, %0}";
14397 p
= "{p\t%1, %0|rp\t%0, %1}";
14399 if (STACK_TOP_P (operands
[0]))
14400 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14402 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14407 if (STACK_TOP_P (operands
[0]))
14409 if (STACK_TOP_P (operands
[1]))
14410 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14412 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14415 else if (STACK_TOP_P (operands
[1]))
14418 p
= "{\t%1, %0|r\t%0, %1}";
14420 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14426 p
= "{r\t%2, %0|\t%0, %2}";
14428 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14434 gcc_unreachable ();
14441 /* Return needed mode for entity in optimize_mode_switching pass. */
14444 ix86_dirflag_mode_needed (rtx_insn
*insn
)
14448 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
14449 return X86_DIRFLAG_ANY
;
14451 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14452 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
14455 if (recog_memoized (insn
) < 0)
14456 return X86_DIRFLAG_ANY
;
14458 if (get_attr_type (insn
) == TYPE_STR
)
14460 /* Emit cld instruction if stringops are used in the function. */
14461 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
14462 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
14464 return X86_DIRFLAG_RESET
;
14467 return X86_DIRFLAG_ANY
;
14470 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14473 ix86_check_avx_upper_register (const_rtx exp
)
14475 return (SSE_REG_P (exp
)
14476 && !EXT_REX_SSE_REG_P (exp
)
14477 && GET_MODE_BITSIZE (GET_MODE (exp
)) > 128);
14480 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14483 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
14485 if (ix86_check_avx_upper_register (dest
))
14487 bool *used
= (bool *) data
;
14492 /* Return needed mode for entity in optimize_mode_switching pass. */
14495 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
14497 if (DEBUG_INSN_P (insn
))
14498 return AVX_U128_ANY
;
14504 /* Needed mode is set to AVX_U128_CLEAN if there are
14505 no 256bit or 512bit modes used in function arguments. */
14506 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
14508 link
= XEXP (link
, 1))
14510 if (GET_CODE (XEXP (link
, 0)) == USE
)
14512 rtx arg
= XEXP (XEXP (link
, 0), 0);
14514 if (ix86_check_avx_upper_register (arg
))
14515 return AVX_U128_DIRTY
;
14519 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
14520 nor 512bit registers used in the function return register. */
14521 bool avx_upper_reg_found
= false;
14522 note_stores (insn
, ix86_check_avx_upper_stores
,
14523 &avx_upper_reg_found
);
14524 if (avx_upper_reg_found
)
14525 return AVX_U128_DIRTY
;
14527 /* If the function is known to preserve some SSE registers,
14528 RA and previous passes can legitimately rely on that for
14529 modes wider than 256 bits. It's only safe to issue a
14530 vzeroupper if all SSE registers are clobbered. */
14531 const function_abi
&abi
= insn_callee_abi (insn
);
14532 if (vzeroupper_pattern (PATTERN (insn
), VOIDmode
)
14533 || !hard_reg_set_subset_p (reg_class_contents
[SSE_REGS
],
14534 abi
.mode_clobbers (V4DImode
)))
14535 return AVX_U128_ANY
;
14537 return AVX_U128_CLEAN
;
14540 subrtx_iterator::array_type array
;
14542 rtx set
= single_set (insn
);
14545 rtx dest
= SET_DEST (set
);
14546 rtx src
= SET_SRC (set
);
14547 if (ix86_check_avx_upper_register (dest
))
14549 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
14550 source isn't zero. */
14551 if (standard_sse_constant_p (src
, GET_MODE (dest
)) != 1)
14552 return AVX_U128_DIRTY
;
14554 return AVX_U128_ANY
;
14558 FOR_EACH_SUBRTX (iter
, array
, src
, NONCONST
)
14559 if (ix86_check_avx_upper_register (*iter
))
14560 return AVX_U128_DIRTY
;
14563 /* This isn't YMM/ZMM load/store. */
14564 return AVX_U128_ANY
;
14567 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14568 Hardware changes state only when a 256bit register is written to,
14569 but we need to prevent the compiler from moving optimal insertion
14570 point above eventual read from 256bit or 512 bit register. */
14571 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
14572 if (ix86_check_avx_upper_register (*iter
))
14573 return AVX_U128_DIRTY
;
14575 return AVX_U128_ANY
;
14578 /* Return mode that i387 must be switched into
14579 prior to the execution of insn. */
14582 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
14584 enum attr_i387_cw mode
;
14586 /* The mode UNINITIALIZED is used to store control word after a
14587 function call or ASM pattern. The mode ANY specify that function
14588 has no requirements on the control word and make no changes in the
14589 bits we are interested in. */
14592 || (NONJUMP_INSN_P (insn
)
14593 && (asm_noperands (PATTERN (insn
)) >= 0
14594 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14595 return I387_CW_UNINITIALIZED
;
14597 if (recog_memoized (insn
) < 0)
14598 return I387_CW_ANY
;
14600 mode
= get_attr_i387_cw (insn
);
14604 case I387_ROUNDEVEN
:
14605 if (mode
== I387_CW_ROUNDEVEN
)
14610 if (mode
== I387_CW_TRUNC
)
14615 if (mode
== I387_CW_FLOOR
)
14620 if (mode
== I387_CW_CEIL
)
14625 gcc_unreachable ();
14628 return I387_CW_ANY
;
14631 /* Return mode that entity must be switched into
14632 prior to the execution of insn. */
14635 ix86_mode_needed (int entity
, rtx_insn
*insn
)
14640 return ix86_dirflag_mode_needed (insn
);
14642 return ix86_avx_u128_mode_needed (insn
);
14643 case I387_ROUNDEVEN
:
14647 return ix86_i387_mode_needed (entity
, insn
);
14649 gcc_unreachable ();
14654 /* Calculate mode of upper 128bit AVX registers after the insn. */
14657 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
14659 rtx pat
= PATTERN (insn
);
14661 if (vzeroupper_pattern (pat
, VOIDmode
)
14662 || vzeroall_pattern (pat
, VOIDmode
))
14663 return AVX_U128_CLEAN
;
14665 /* We know that state is clean after CALL insn if there are no
14666 256bit or 512bit registers used in the function return register. */
14669 bool avx_upper_reg_found
= false;
14670 note_stores (insn
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
14672 return avx_upper_reg_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
14675 /* Otherwise, return current mode. Remember that if insn
14676 references AVX 256bit or 512bit registers, the mode was already
14677 changed to DIRTY from MODE_NEEDED. */
14681 /* Return the mode that an insn results in. */
14684 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
14691 return ix86_avx_u128_mode_after (mode
, insn
);
14692 case I387_ROUNDEVEN
:
14698 gcc_unreachable ();
14703 ix86_dirflag_mode_entry (void)
14705 /* For TARGET_CLD or in the interrupt handler we can't assume
14706 direction flag state at function entry. */
14708 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
14709 return X86_DIRFLAG_ANY
;
14711 return X86_DIRFLAG_RESET
;
14715 ix86_avx_u128_mode_entry (void)
14719 /* Entry mode is set to AVX_U128_DIRTY if there are
14720 256bit or 512bit modes used in function arguments. */
14721 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
14722 arg
= TREE_CHAIN (arg
))
14724 rtx incoming
= DECL_INCOMING_RTL (arg
);
14726 if (incoming
&& ix86_check_avx_upper_register (incoming
))
14727 return AVX_U128_DIRTY
;
14730 return AVX_U128_CLEAN
;
14733 /* Return a mode that ENTITY is assumed to be
14734 switched to at function entry. */
14737 ix86_mode_entry (int entity
)
14742 return ix86_dirflag_mode_entry ();
14744 return ix86_avx_u128_mode_entry ();
14745 case I387_ROUNDEVEN
:
14749 return I387_CW_ANY
;
14751 gcc_unreachable ();
14756 ix86_avx_u128_mode_exit (void)
14758 rtx reg
= crtl
->return_rtx
;
14760 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14761 or 512 bit modes used in the function return register. */
14762 if (reg
&& ix86_check_avx_upper_register (reg
))
14763 return AVX_U128_DIRTY
;
14765 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14766 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14768 return ix86_avx_u128_mode_entry ();
14771 /* Return a mode that ENTITY is assumed to be
14772 switched to at function exit. */
14775 ix86_mode_exit (int entity
)
14780 return X86_DIRFLAG_ANY
;
14782 return ix86_avx_u128_mode_exit ();
14783 case I387_ROUNDEVEN
:
14787 return I387_CW_ANY
;
14789 gcc_unreachable ();
14794 ix86_mode_priority (int, int n
)
14799 /* Output code to initialize control word copies used by trunc?f?i and
14800 rounding patterns. CURRENT_MODE is set to current control word,
14801 while NEW_MODE is set to new control word. */
14804 emit_i387_cw_initialization (int mode
)
14806 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14809 enum ix86_stack_slot slot
;
14811 rtx reg
= gen_reg_rtx (HImode
);
14813 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14814 emit_move_insn (reg
, copy_rtx (stored_mode
));
14818 case I387_CW_ROUNDEVEN
:
14819 /* round to nearest */
14820 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14821 slot
= SLOT_CW_ROUNDEVEN
;
14824 case I387_CW_TRUNC
:
14825 /* round toward zero (truncate) */
14826 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14827 slot
= SLOT_CW_TRUNC
;
14830 case I387_CW_FLOOR
:
14831 /* round down toward -oo */
14832 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14833 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14834 slot
= SLOT_CW_FLOOR
;
14838 /* round up toward +oo */
14839 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14840 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14841 slot
= SLOT_CW_CEIL
;
14845 gcc_unreachable ();
14848 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
14850 new_mode
= assign_386_stack_local (HImode
, slot
);
14851 emit_move_insn (new_mode
, reg
);
14854 /* Generate one or more insns to set ENTITY to MODE. */
14857 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
14858 HARD_REG_SET regs_live ATTRIBUTE_UNUSED
)
14863 if (mode
== X86_DIRFLAG_RESET
)
14864 emit_insn (gen_cld ());
14867 if (mode
== AVX_U128_CLEAN
)
14868 ix86_expand_avx_vzeroupper ();
14870 case I387_ROUNDEVEN
:
14874 if (mode
!= I387_CW_ANY
14875 && mode
!= I387_CW_UNINITIALIZED
)
14876 emit_i387_cw_initialization (mode
);
14879 gcc_unreachable ();
14883 /* Output code for INSN to convert a float to a signed int. OPERANDS
14884 are the insn operands. The output may be [HSD]Imode and the input
14885 operand may be [SDX]Fmode. */
14888 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
14890 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
14891 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
14892 int round_mode
= get_attr_i387_cw (insn
);
14894 static char buf
[40];
14897 /* Jump through a hoop or two for DImode, since the hardware has no
14898 non-popping instruction. We used to do this a different way, but
14899 that was somewhat fragile and broke with post-reload splitters. */
14900 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
14901 output_asm_insn ("fld\t%y1", operands
);
14903 gcc_assert (STACK_TOP_P (operands
[1]));
14904 gcc_assert (MEM_P (operands
[0]));
14905 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
14908 return "fisttp%Z0\t%0";
14910 strcpy (buf
, "fist");
14912 if (round_mode
!= I387_CW_ANY
)
14913 output_asm_insn ("fldcw\t%3", operands
);
14916 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
14918 output_asm_insn (buf
, operands
);
14920 if (round_mode
!= I387_CW_ANY
)
14921 output_asm_insn ("fldcw\t%2", operands
);
14926 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14927 have the values zero or one, indicates the ffreep insn's operand
14928 from the OPERANDS array. */
14930 static const char *
14931 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
14933 if (TARGET_USE_FFREEP
)
14934 #ifdef HAVE_AS_IX86_FFREEP
14935 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
14938 static char retval
[32];
14939 int regno
= REGNO (operands
[opno
]);
14941 gcc_assert (STACK_REGNO_P (regno
));
14943 regno
-= FIRST_STACK_REG
;
14945 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
14950 return opno
? "fstp\t%y1" : "fstp\t%y0";
14954 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14955 should be used. UNORDERED_P is true when fucom should be used. */
14958 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
14959 bool eflags_p
, bool unordered_p
)
14961 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
14962 bool stack_top_dies
;
14964 static char buf
[40];
14967 gcc_assert (STACK_TOP_P (xops
[0]));
14969 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
14973 p
= unordered_p
? "fucomi" : "fcomi";
14976 p
= "p\t{%y1, %0|%0, %y1}";
14977 strcat (buf
, p
+ !stack_top_dies
);
14982 if (STACK_REG_P (xops
[1])
14984 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
14986 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
14988 /* If both the top of the 387 stack die, and the other operand
14989 is also a stack register that dies, then this must be a
14990 `fcompp' float compare. */
14991 p
= unordered_p
? "fucompp" : "fcompp";
14994 else if (const0_operand (xops
[1], VOIDmode
))
14996 gcc_assert (!unordered_p
);
14997 strcpy (buf
, "ftst");
15001 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
15003 gcc_assert (!unordered_p
);
15007 p
= unordered_p
? "fucom" : "fcom";
15012 strcat (buf
, p
+ !stack_top_dies
);
15015 output_asm_insn (buf
, operands
);
15016 return "fnstsw\t%0";
15020 ix86_output_addr_vec_elt (FILE *file
, int value
)
15022 const char *directive
= ASM_LONG
;
15026 directive
= ASM_QUAD
;
15028 gcc_assert (!TARGET_64BIT
);
15031 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15035 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15037 const char *directive
= ASM_LONG
;
15040 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15041 directive
= ASM_QUAD
;
15043 gcc_assert (!TARGET_64BIT
);
15045 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15046 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15047 fprintf (file
, "%s%s%d-%s%d\n",
15048 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15050 else if (TARGET_MACHO
)
15052 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15053 machopic_output_function_base_name (file
);
15057 else if (HAVE_AS_GOTOFF_IN_DATA
)
15058 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15060 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15061 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15064 #define LEA_MAX_STALL (3)
15065 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15067 /* Increase given DISTANCE in half-cycles according to
15068 dependencies between PREV and NEXT instructions.
15069 Add 1 half-cycle if there is no dependency and
15070 go to next cycle if there is some dependecy. */
15072 static unsigned int
15073 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
15077 if (!prev
|| !next
)
15078 return distance
+ (distance
& 1) + 2;
15080 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
15081 return distance
+ 1;
15083 FOR_EACH_INSN_USE (use
, next
)
15084 FOR_EACH_INSN_DEF (def
, prev
)
15085 if (!DF_REF_IS_ARTIFICIAL (def
)
15086 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
15087 return distance
+ (distance
& 1) + 2;
15089 return distance
+ 1;
15092 /* Function checks if instruction INSN defines register number
15093 REGNO1 or REGNO2. */
15096 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
15101 FOR_EACH_INSN_DEF (def
, insn
)
15102 if (DF_REF_REG_DEF_P (def
)
15103 && !DF_REF_IS_ARTIFICIAL (def
)
15104 && (regno1
== DF_REF_REGNO (def
)
15105 || regno2
== DF_REF_REGNO (def
)))
15111 /* Function checks if instruction INSN uses register number
15112 REGNO as a part of address expression. */
15115 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
15119 FOR_EACH_INSN_USE (use
, insn
)
15120 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
15126 /* Search backward for non-agu definition of register number REGNO1
15127 or register number REGNO2 in basic block starting from instruction
15128 START up to head of basic block or instruction INSN.
15130 Function puts true value into *FOUND var if definition was found
15131 and false otherwise.
15133 Distance in half-cycles between START and found instruction or head
15134 of BB is added to DISTANCE and returned. */
15137 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
15138 rtx_insn
*insn
, int distance
,
15139 rtx_insn
*start
, bool *found
)
15141 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
15142 rtx_insn
*prev
= start
;
15143 rtx_insn
*next
= NULL
;
15149 && distance
< LEA_SEARCH_THRESHOLD
)
15151 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
15153 distance
= increase_distance (prev
, next
, distance
);
15154 if (insn_defines_reg (regno1
, regno2
, prev
))
15156 if (recog_memoized (prev
) < 0
15157 || get_attr_type (prev
) != TYPE_LEA
)
15166 if (prev
== BB_HEAD (bb
))
15169 prev
= PREV_INSN (prev
);
15175 /* Search backward for non-agu definition of register number REGNO1
15176 or register number REGNO2 in INSN's basic block until
15177 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15178 2. Reach neighbor BBs boundary, or
15179 3. Reach agu definition.
15180 Returns the distance between the non-agu definition point and INSN.
15181 If no definition point, returns -1. */
15184 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
15187 basic_block bb
= BLOCK_FOR_INSN (insn
);
15189 bool found
= false;
15191 if (insn
!= BB_HEAD (bb
))
15192 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
15193 distance
, PREV_INSN (insn
),
15196 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
15200 bool simple_loop
= false;
15202 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
15205 simple_loop
= true;
15210 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
15212 BB_END (bb
), &found
);
15215 int shortest_dist
= -1;
15216 bool found_in_bb
= false;
15218 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
15221 = distance_non_agu_define_in_bb (regno1
, regno2
,
15227 if (shortest_dist
< 0)
15228 shortest_dist
= bb_dist
;
15229 else if (bb_dist
> 0)
15230 shortest_dist
= MIN (bb_dist
, shortest_dist
);
15236 distance
= shortest_dist
;
15243 return distance
>> 1;
15246 /* Return the distance in half-cycles between INSN and the next
15247 insn that uses register number REGNO in memory address added
15248 to DISTANCE. Return -1 if REGNO0 is set.
15250 Put true value into *FOUND if register usage was found and
15252 Put true value into *REDEFINED if register redefinition was
15253 found and false otherwise. */
15256 distance_agu_use_in_bb (unsigned int regno
,
15257 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
15258 bool *found
, bool *redefined
)
15260 basic_block bb
= NULL
;
15261 rtx_insn
*next
= start
;
15262 rtx_insn
*prev
= NULL
;
15265 *redefined
= false;
15267 if (start
!= NULL_RTX
)
15269 bb
= BLOCK_FOR_INSN (start
);
15270 if (start
!= BB_HEAD (bb
))
15271 /* If insn and start belong to the same bb, set prev to insn,
15272 so the call to increase_distance will increase the distance
15273 between insns by 1. */
15279 && distance
< LEA_SEARCH_THRESHOLD
)
15281 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
15283 distance
= increase_distance(prev
, next
, distance
);
15284 if (insn_uses_reg_mem (regno
, next
))
15286 /* Return DISTANCE if OP0 is used in memory
15287 address in NEXT. */
15292 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
15294 /* Return -1 if OP0 is set in NEXT. */
15302 if (next
== BB_END (bb
))
15305 next
= NEXT_INSN (next
);
15311 /* Return the distance between INSN and the next insn that uses
15312 register number REGNO0 in memory address. Return -1 if no such
15313 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15316 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
15318 basic_block bb
= BLOCK_FOR_INSN (insn
);
15320 bool found
= false;
15321 bool redefined
= false;
15323 if (insn
!= BB_END (bb
))
15324 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
15326 &found
, &redefined
);
15328 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
15332 bool simple_loop
= false;
15334 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
15337 simple_loop
= true;
15342 distance
= distance_agu_use_in_bb (regno0
, insn
,
15343 distance
, BB_HEAD (bb
),
15344 &found
, &redefined
);
15347 int shortest_dist
= -1;
15348 bool found_in_bb
= false;
15349 bool redefined_in_bb
= false;
15351 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
15354 = distance_agu_use_in_bb (regno0
, insn
,
15355 distance
, BB_HEAD (e
->dest
),
15356 &found_in_bb
, &redefined_in_bb
);
15359 if (shortest_dist
< 0)
15360 shortest_dist
= bb_dist
;
15361 else if (bb_dist
> 0)
15362 shortest_dist
= MIN (bb_dist
, shortest_dist
);
15368 distance
= shortest_dist
;
15372 if (!found
|| redefined
)
15375 return distance
>> 1;
15378 /* Define this macro to tune LEA priority vs ADD, it take effect when
15379 there is a dilemma of choosing LEA or ADD
15380 Negative value: ADD is more preferred than LEA
15382 Positive value: LEA is more preferred than ADD. */
15383 #define IX86_LEA_PRIORITY 0
15385 /* Return true if usage of lea INSN has performance advantage
15386 over a sequence of instructions. Instructions sequence has
15387 SPLIT_COST cycles higher latency than lea latency. */
15390 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
15391 unsigned int regno2
, int split_cost
, bool has_scale
)
15393 int dist_define
, dist_use
;
15395 /* For Atom processors newer than Bonnell, if using a 2-source or
15396 3-source LEA for non-destructive destination purposes, or due to
15397 wanting ability to use SCALE, the use of LEA is justified. */
15398 if (!TARGET_CPU_P (BONNELL
))
15402 if (split_cost
< 1)
15404 if (regno0
== regno1
|| regno0
== regno2
)
15409 /* Remember recog_data content. */
15410 struct recog_data_d recog_data_save
= recog_data
;
15412 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
15413 dist_use
= distance_agu_use (regno0
, insn
);
15415 /* distance_non_agu_define can call get_attr_type which can call
15416 recog_memoized, restore recog_data back to previous content. */
15417 recog_data
= recog_data_save
;
15419 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
15421 /* If there is no non AGU operand definition, no AGU
15422 operand usage and split cost is 0 then both lea
15423 and non lea variants have same priority. Currently
15424 we prefer lea for 64 bit code and non lea on 32 bit
15426 if (dist_use
< 0 && split_cost
== 0)
15427 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
15432 /* With longer definitions distance lea is more preferable.
15433 Here we change it to take into account splitting cost and
15435 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
15437 /* If there is no use in memory addess then we just check
15438 that split cost exceeds AGU stall. */
15440 return dist_define
> LEA_MAX_STALL
;
15442 /* If this insn has both backward non-agu dependence and forward
15443 agu dependence, the one with short distance takes effect. */
15444 return dist_define
>= dist_use
;
15447 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15448 move and add to avoid AGU stalls. */
15451 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
15453 unsigned int regno0
, regno1
, regno2
;
15455 /* Check if we need to optimize. */
15456 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15459 regno0
= true_regnum (operands
[0]);
15460 regno1
= true_regnum (operands
[1]);
15461 regno2
= true_regnum (operands
[2]);
15463 /* We need to split only adds with non destructive
15464 destination operand. */
15465 if (regno0
== regno1
|| regno0
== regno2
)
15468 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
15471 /* Return true if we should emit lea instruction instead of mov
15475 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
15477 unsigned int regno0
, regno1
;
15479 /* Check if we need to optimize. */
15480 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15483 /* Use lea for reg to reg moves only. */
15484 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
15487 regno0
= true_regnum (operands
[0]);
15488 regno1
= true_regnum (operands
[1]);
15490 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
15493 /* Return true if we need to split lea into a sequence of
15494 instructions to avoid AGU stalls during peephole2. */
15497 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
15499 unsigned int regno0
, regno1
, regno2
;
15501 struct ix86_address parts
;
15504 /* The "at least two components" test below might not catch simple
15505 move or zero extension insns if parts.base is non-NULL and parts.disp
15506 is const0_rtx as the only components in the address, e.g. if the
15507 register is %rbp or %r13. As this test is much cheaper and moves or
15508 zero extensions are the common case, do this check first. */
15509 if (REG_P (operands
[1])
15510 || (SImode_address_operand (operands
[1], VOIDmode
)
15511 && REG_P (XEXP (operands
[1], 0))))
15514 ok
= ix86_decompose_address (operands
[1], &parts
);
15517 /* There should be at least two components in the address. */
15518 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
15519 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
15522 /* We should not split into add if non legitimate pic
15523 operand is used as displacement. */
15524 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
15527 regno0
= true_regnum (operands
[0]) ;
15528 regno1
= INVALID_REGNUM
;
15529 regno2
= INVALID_REGNUM
;
15532 regno1
= true_regnum (parts
.base
);
15534 regno2
= true_regnum (parts
.index
);
15536 /* Use add for a = a + b and a = b + a since it is faster and shorter
15537 than lea for most processors. For the processors like BONNELL, if
15538 the destination register of LEA holds an actual address which will
15539 be used soon, LEA is better and otherwise ADD is better. */
15540 if (!TARGET_CPU_P (BONNELL
)
15541 && parts
.scale
== 1
15542 && (!parts
.disp
|| parts
.disp
== const0_rtx
)
15543 && (regno0
== regno1
|| regno0
== regno2
))
15546 /* Check we need to optimize. */
15547 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
15552 /* Compute how many cycles we will add to execution time
15553 if split lea into a sequence of instructions. */
15554 if (parts
.base
|| parts
.index
)
15556 /* Have to use mov instruction if non desctructive
15557 destination form is used. */
15558 if (regno1
!= regno0
&& regno2
!= regno0
)
15561 /* Have to add index to base if both exist. */
15562 if (parts
.base
&& parts
.index
)
15565 /* Have to use shift and adds if scale is 2 or greater. */
15566 if (parts
.scale
> 1)
15568 if (regno0
!= regno1
)
15570 else if (regno2
== regno0
)
15573 split_cost
+= parts
.scale
;
15576 /* Have to use add instruction with immediate if
15577 disp is non zero. */
15578 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
15581 /* Subtract the price of lea. */
15585 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
15589 /* Return true if it is ok to optimize an ADD operation to LEA
15590 operation to avoid flag register consumation. For most processors,
15591 ADD is faster than LEA. For the processors like BONNELL, if the
15592 destination register of LEA holds an actual address which will be
15593 used soon, LEA is better and otherwise ADD is better. */
15596 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
15598 unsigned int regno0
= true_regnum (operands
[0]);
15599 unsigned int regno1
= true_regnum (operands
[1]);
15600 unsigned int regno2
= true_regnum (operands
[2]);
15602 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15603 if (regno0
!= regno1
&& regno0
!= regno2
)
15606 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15609 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
15612 /* Return true if destination reg of SET_BODY is shift count of
15616 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
15622 /* Retrieve destination of SET_BODY. */
15623 switch (GET_CODE (set_body
))
15626 set_dest
= SET_DEST (set_body
);
15627 if (!set_dest
|| !REG_P (set_dest
))
15631 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
15632 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
15640 /* Retrieve shift count of USE_BODY. */
15641 switch (GET_CODE (use_body
))
15644 shift_rtx
= XEXP (use_body
, 1);
15647 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
15648 if (ix86_dep_by_shift_count_body (set_body
,
15649 XVECEXP (use_body
, 0, i
)))
15657 && (GET_CODE (shift_rtx
) == ASHIFT
15658 || GET_CODE (shift_rtx
) == LSHIFTRT
15659 || GET_CODE (shift_rtx
) == ASHIFTRT
15660 || GET_CODE (shift_rtx
) == ROTATE
15661 || GET_CODE (shift_rtx
) == ROTATERT
))
15663 rtx shift_count
= XEXP (shift_rtx
, 1);
15665 /* Return true if shift count is dest of SET_BODY. */
15666 if (REG_P (shift_count
))
15668 /* Add check since it can be invoked before register
15669 allocation in pre-reload schedule. */
15670 if (reload_completed
15671 && true_regnum (set_dest
) == true_regnum (shift_count
))
15673 else if (REGNO(set_dest
) == REGNO(shift_count
))
15681 /* Return true if destination reg of SET_INSN is shift count of
15685 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
15687 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
15688 PATTERN (use_insn
));
15691 /* Return TRUE or FALSE depending on whether the unary operator meets the
15692 appropriate constraints. */
15695 ix86_unary_operator_ok (enum rtx_code
,
15699 /* If one of operands is memory, source and destination must match. */
15700 if ((MEM_P (operands
[0])
15701 || MEM_P (operands
[1]))
15702 && ! rtx_equal_p (operands
[0], operands
[1]))
15707 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15708 are ok, keeping in mind the possible movddup alternative. */
15711 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
15713 if (MEM_P (operands
[0]))
15714 return rtx_equal_p (operands
[0], operands
[1 + high
]);
15715 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
15720 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15721 then replicate the value for all elements of the vector
15725 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
15729 machine_mode scalar_mode
;
15758 n_elt
= GET_MODE_NUNITS (mode
);
15759 v
= rtvec_alloc (n_elt
);
15760 scalar_mode
= GET_MODE_INNER (mode
);
15762 RTVEC_ELT (v
, 0) = value
;
15764 for (i
= 1; i
< n_elt
; ++i
)
15765 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
15767 return gen_rtx_CONST_VECTOR (mode
, v
);
15770 gcc_unreachable ();
15774 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15775 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15776 for an SSE register. If VECT is true, then replicate the mask for
15777 all elements of the vector register. If INVERT is true, then create
15778 a mask excluding the sign bit. */
15781 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
15783 machine_mode vec_mode
, imode
;
15820 vec_mode
= VOIDmode
;
15825 gcc_unreachable ();
15828 machine_mode inner_mode
= GET_MODE_INNER (mode
);
15829 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
15830 GET_MODE_BITSIZE (inner_mode
));
15832 w
= wi::bit_not (w
);
15834 /* Force this value into the low part of a fp vector constant. */
15835 mask
= immed_wide_int_const (w
, imode
);
15836 mask
= gen_lowpart (inner_mode
, mask
);
15838 if (vec_mode
== VOIDmode
)
15839 return force_reg (inner_mode
, mask
);
15841 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
15842 return force_reg (vec_mode
, v
);
15845 /* Return HOST_WIDE_INT for const vector OP in MODE. */
15848 ix86_convert_const_vector_to_integer (rtx op
, machine_mode mode
)
15850 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
15851 gcc_unreachable ();
15853 int nunits
= GET_MODE_NUNITS (mode
);
15854 wide_int val
= wi::zero (GET_MODE_BITSIZE (mode
));
15855 machine_mode innermode
= GET_MODE_INNER (mode
);
15856 unsigned int innermode_bits
= GET_MODE_BITSIZE (innermode
);
15866 for (int i
= 0; i
< nunits
; ++i
)
15868 int v
= INTVAL (XVECEXP (op
, 0, i
));
15869 wide_int wv
= wi::shwi (v
, innermode_bits
);
15870 val
= wi::insert (val
, wv
, innermode_bits
* i
, innermode_bits
);
15878 for (int i
= 0; i
< nunits
; ++i
)
15880 rtx x
= XVECEXP (op
, 0, i
);
15881 int v
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (x
),
15882 REAL_MODE_FORMAT (innermode
));
15883 wide_int wv
= wi::shwi (v
, innermode_bits
);
15884 val
= wi::insert (val
, wv
, innermode_bits
* i
, innermode_bits
);
15888 gcc_unreachable ();
15891 return val
.to_shwi ();
15894 /* Return TRUE or FALSE depending on whether the first SET in INSN
15895 has source and destination with matching CC modes, and that the
15896 CC mode is at least as constrained as REQ_MODE. */
15899 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
15902 machine_mode set_mode
;
15904 set
= PATTERN (insn
);
15905 if (GET_CODE (set
) == PARALLEL
)
15906 set
= XVECEXP (set
, 0, 0);
15907 gcc_assert (GET_CODE (set
) == SET
);
15908 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
15910 set_mode
= GET_MODE (SET_DEST (set
));
15914 if (req_mode
!= CCNOmode
15915 && (req_mode
!= CCmode
15916 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
15920 if (req_mode
== CCGCmode
)
15924 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
15928 if (req_mode
== CCZmode
)
15941 if (set_mode
!= req_mode
)
15946 gcc_unreachable ();
15949 return GET_MODE (SET_SRC (set
)) == set_mode
;
15953 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
15955 machine_mode mode
= GET_MODE (op0
);
15957 if (SCALAR_FLOAT_MODE_P (mode
))
15959 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
15965 /* Only zero flag is needed. */
15966 case EQ
: /* ZF=0 */
15967 case NE
: /* ZF!=0 */
15969 /* Codes needing carry flag. */
15970 case GEU
: /* CF=0 */
15971 case LTU
: /* CF=1 */
15973 /* Detect overflow checks. They need just the carry flag. */
15974 if (GET_CODE (op0
) == PLUS
15975 && (rtx_equal_p (op1
, XEXP (op0
, 0))
15976 || rtx_equal_p (op1
, XEXP (op0
, 1))))
15978 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15980 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15982 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15983 where CC_CCC is either CC or CCC. */
15984 else if (code
== LTU
15985 && GET_CODE (op0
) == NEG
15986 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
15987 && REG_P (XEXP (geu
, 0))
15988 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
15989 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
15990 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
15991 && XEXP (geu
, 1) == const0_rtx
15992 && GET_CODE (op1
) == LTU
15993 && REG_P (XEXP (op1
, 0))
15994 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
15995 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
15996 && XEXP (op1
, 1) == const0_rtx
)
15998 /* Similarly for *x86_cmc pattern.
15999 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16000 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16001 It is sufficient to test that the operand modes are CCCmode. */
16002 else if (code
== LTU
16003 && GET_CODE (op0
) == NEG
16004 && GET_CODE (XEXP (op0
, 0)) == LTU
16005 && GET_MODE (XEXP (XEXP (op0
, 0), 0)) == CCCmode
16006 && GET_CODE (op1
) == GEU
16007 && GET_MODE (XEXP (op1
, 0)) == CCCmode
)
16011 case GTU
: /* CF=0 & ZF=0 */
16012 case LEU
: /* CF=1 | ZF=1 */
16014 /* Codes possibly doable only with sign flag when
16015 comparing against zero. */
16016 case GE
: /* SF=OF or SF=0 */
16017 case LT
: /* SF<>OF or SF=1 */
16018 if (op1
== const0_rtx
)
16021 /* For other cases Carry flag is not required. */
16023 /* Codes doable only with sign flag when comparing
16024 against zero, but we miss jump instruction for it
16025 so we need to use relational tests against overflow
16026 that thus needs to be zero. */
16027 case GT
: /* ZF=0 & SF=OF */
16028 case LE
: /* ZF=1 | SF<>OF */
16029 if (op1
== const0_rtx
)
16033 /* strcmp pattern do (use flags) and combine may ask us for proper
16038 gcc_unreachable ();
16042 /* Return TRUE or FALSE depending on whether the ptest instruction
16043 INSN has source and destination with suitable matching CC modes. */
16046 ix86_match_ptest_ccmode (rtx insn
)
16049 machine_mode set_mode
;
16051 set
= PATTERN (insn
);
16052 gcc_assert (GET_CODE (set
) == SET
);
16053 src
= SET_SRC (set
);
16054 gcc_assert (GET_CODE (src
) == UNSPEC
16055 && XINT (src
, 1) == UNSPEC_PTEST
);
16057 set_mode
= GET_MODE (src
);
16058 if (set_mode
!= CCZmode
16059 && set_mode
!= CCCmode
16060 && set_mode
!= CCmode
)
16062 return GET_MODE (SET_DEST (set
)) == set_mode
;
16065 /* Return the fixed registers used for condition codes. */
16068 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
16071 *p2
= INVALID_REGNUM
;
16075 /* If two condition code modes are compatible, return a condition code
16076 mode which is compatible with both. Otherwise, return
16079 static machine_mode
16080 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
16085 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
16088 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
16089 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
16092 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
16093 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
16097 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
16099 else if (m2
== CCZmode
16100 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
16106 gcc_unreachable ();
16137 /* These are only compatible with themselves, which we already
16143 /* Return strategy to use for floating-point. We assume that fcomi is always
16144 preferrable where available, since that is also true when looking at size
16145 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16147 enum ix86_fpcmp_strategy
16148 ix86_fp_comparison_strategy (enum rtx_code
)
16150 /* Do fcomi/sahf based test when profitable. */
16153 return IX86_FPCMP_COMI
;
16155 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
16156 return IX86_FPCMP_SAHF
;
16158 return IX86_FPCMP_ARITH
;
16161 /* Convert comparison codes we use to represent FP comparison to integer
16162 code that will result in proper branch. Return UNKNOWN if no such code
16166 ix86_fp_compare_code_to_integer (enum rtx_code code
)
16190 /* Zero extend possibly SImode EXP to Pmode register. */
16192 ix86_zero_extend_to_Pmode (rtx exp
)
16194 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
16197 /* Return true if the function is called via PLT. */
16200 ix86_call_use_plt_p (rtx call_op
)
16202 if (SYMBOL_REF_LOCAL_P (call_op
))
16204 if (SYMBOL_REF_DECL (call_op
)
16205 && TREE_CODE (SYMBOL_REF_DECL (call_op
)) == FUNCTION_DECL
)
16207 /* NB: All ifunc functions must be called via PLT. */
16209 = cgraph_node::get (SYMBOL_REF_DECL (call_op
));
16210 if (node
&& node
->ifunc_resolver
)
16218 /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16219 the PLT entry will be used as the function address for local IFUNC
16220 functions. When the PIC register is needed for PLT call, indirect
16221 call via the PLT entry will fail since the PIC register may not be
16222 set up properly for indirect call. In this case, we should return
16226 ix86_ifunc_ref_local_ok (void)
16228 return !flag_pic
|| (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
);
16231 /* Return true if the function being called was marked with attribute
16232 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16233 to handle the non-PIC case in the backend because there is no easy
16234 interface for the front-end to force non-PLT calls to use the GOT.
16235 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16236 to call the function marked "noplt" indirectly. */
16239 ix86_nopic_noplt_attribute_p (rtx call_op
)
16241 if (flag_pic
|| ix86_cmodel
== CM_LARGE
16242 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
16243 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
16244 || SYMBOL_REF_LOCAL_P (call_op
))
16247 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
16250 || (symbol_decl
!= NULL_TREE
16251 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
16257 /* Helper to output the jmp/call. */
16259 ix86_output_jmp_thunk_or_indirect (const char *thunk_name
, const int regno
)
16261 if (thunk_name
!= NULL
)
16263 if (REX_INT_REGNO_P (regno
)
16264 && ix86_indirect_branch_cs_prefix
)
16265 fprintf (asm_out_file
, "\tcs\n");
16266 fprintf (asm_out_file
, "\tjmp\t");
16267 assemble_name (asm_out_file
, thunk_name
);
16268 putc ('\n', asm_out_file
);
16269 if ((ix86_harden_sls
& harden_sls_indirect_jmp
))
16270 fputs ("\tint3\n", asm_out_file
);
16273 output_indirect_thunk (regno
);
16276 /* Output indirect branch via a call and return thunk. CALL_OP is a
16277 register which contains the branch target. XASM is the assembly
16278 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16279 A normal call is converted to:
16281 call __x86_indirect_thunk_reg
16283 and a tail call is converted to:
16285 jmp __x86_indirect_thunk_reg
16289 ix86_output_indirect_branch_via_reg (rtx call_op
, bool sibcall_p
)
16291 char thunk_name_buf
[32];
16293 enum indirect_thunk_prefix need_prefix
16294 = indirect_thunk_need_prefix (current_output_insn
);
16295 int regno
= REGNO (call_op
);
16297 if (cfun
->machine
->indirect_branch_type
16298 != indirect_branch_thunk_inline
)
16300 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
16301 SET_HARD_REG_BIT (indirect_thunks_used
, regno
);
16303 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
16304 thunk_name
= thunk_name_buf
;
16310 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16313 if (thunk_name
!= NULL
)
16315 if (REX_INT_REGNO_P (regno
)
16316 && ix86_indirect_branch_cs_prefix
)
16317 fprintf (asm_out_file
, "\tcs\n");
16318 fprintf (asm_out_file
, "\tcall\t");
16319 assemble_name (asm_out_file
, thunk_name
);
16320 putc ('\n', asm_out_file
);
16324 char indirectlabel1
[32];
16325 char indirectlabel2
[32];
16327 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
16329 indirectlabelno
++);
16330 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
16332 indirectlabelno
++);
16335 fputs ("\tjmp\t", asm_out_file
);
16336 assemble_name_raw (asm_out_file
, indirectlabel2
);
16337 fputc ('\n', asm_out_file
);
16339 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
16341 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16343 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
16346 fputs ("\tcall\t", asm_out_file
);
16347 assemble_name_raw (asm_out_file
, indirectlabel1
);
16348 fputc ('\n', asm_out_file
);
16352 /* Output indirect branch via a call and return thunk. CALL_OP is
16353 the branch target. XASM is the assembly template for CALL_OP.
16354 Branch is a tail call if SIBCALL_P is true. A normal call is
16360 jmp __x86_indirect_thunk
16364 and a tail call is converted to:
16367 jmp __x86_indirect_thunk
16371 ix86_output_indirect_branch_via_push (rtx call_op
, const char *xasm
,
16374 char thunk_name_buf
[32];
16377 enum indirect_thunk_prefix need_prefix
16378 = indirect_thunk_need_prefix (current_output_insn
);
16381 if (cfun
->machine
->indirect_branch_type
16382 != indirect_branch_thunk_inline
)
16384 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
16385 indirect_thunk_needed
= true;
16386 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
16387 thunk_name
= thunk_name_buf
;
16392 snprintf (push_buf
, sizeof (push_buf
), "push{%c}\t%s",
16393 TARGET_64BIT
? 'q' : 'l', xasm
);
16397 output_asm_insn (push_buf
, &call_op
);
16398 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16402 char indirectlabel1
[32];
16403 char indirectlabel2
[32];
16405 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
16407 indirectlabelno
++);
16408 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
16410 indirectlabelno
++);
16413 fputs ("\tjmp\t", asm_out_file
);
16414 assemble_name_raw (asm_out_file
, indirectlabel2
);
16415 fputc ('\n', asm_out_file
);
16417 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
16419 /* An external function may be called via GOT, instead of PLT. */
16420 if (MEM_P (call_op
))
16422 struct ix86_address parts
;
16423 rtx addr
= XEXP (call_op
, 0);
16424 if (ix86_decompose_address (addr
, &parts
)
16425 && parts
.base
== stack_pointer_rtx
)
16427 /* Since call will adjust stack by -UNITS_PER_WORD,
16428 we must convert "disp(stack, index, scale)" to
16429 "disp+UNITS_PER_WORD(stack, index, scale)". */
16432 addr
= gen_rtx_MULT (Pmode
, parts
.index
,
16433 GEN_INT (parts
.scale
));
16434 addr
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16438 addr
= stack_pointer_rtx
;
16441 if (parts
.disp
!= NULL_RTX
)
16442 disp
= plus_constant (Pmode
, parts
.disp
,
16445 disp
= GEN_INT (UNITS_PER_WORD
);
16447 addr
= gen_rtx_PLUS (Pmode
, addr
, disp
);
16448 call_op
= gen_rtx_MEM (GET_MODE (call_op
), addr
);
16452 output_asm_insn (push_buf
, &call_op
);
16454 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
16459 fputs ("\tcall\t", asm_out_file
);
16460 assemble_name_raw (asm_out_file
, indirectlabel1
);
16461 fputc ('\n', asm_out_file
);
16465 /* Output indirect branch via a call and return thunk. CALL_OP is
16466 the branch target. XASM is the assembly template for CALL_OP.
16467 Branch is a tail call if SIBCALL_P is true. */
16470 ix86_output_indirect_branch (rtx call_op
, const char *xasm
,
16473 if (REG_P (call_op
))
16474 ix86_output_indirect_branch_via_reg (call_op
, sibcall_p
);
16476 ix86_output_indirect_branch_via_push (call_op
, xasm
, sibcall_p
);
16479 /* Output indirect jump. CALL_OP is the jump target. */
16482 ix86_output_indirect_jmp (rtx call_op
)
16484 if (cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
)
16486 /* We can't have red-zone since "call" in the indirect thunk
16487 pushes the return address onto stack, destroying red-zone. */
16488 if (ix86_red_zone_used
)
16489 gcc_unreachable ();
16491 ix86_output_indirect_branch (call_op
, "%0", true);
16494 output_asm_insn ("%!jmp\t%A0", &call_op
);
16495 return (ix86_harden_sls
& harden_sls_indirect_jmp
) ? "int3" : "";
16498 /* Output return instrumentation for current function if needed. */
16501 output_return_instrumentation (void)
16503 if (ix86_instrument_return
!= instrument_return_none
16505 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun
->decl
))
16507 if (ix86_flag_record_return
)
16508 fprintf (asm_out_file
, "1:\n");
16509 switch (ix86_instrument_return
)
16511 case instrument_return_call
:
16512 fprintf (asm_out_file
, "\tcall\t__return__\n");
16514 case instrument_return_nop5
:
16515 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
16516 fprintf (asm_out_file
, ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
16518 case instrument_return_none
:
16522 if (ix86_flag_record_return
)
16524 fprintf (asm_out_file
, "\t.section __return_loc, \"a\",@progbits\n");
16525 fprintf (asm_out_file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
16526 fprintf (asm_out_file
, "\t.previous\n");
16531 /* Output function return. CALL_OP is the jump target. Add a REP
16532 prefix to RET if LONG_P is true and function return is kept. */
16535 ix86_output_function_return (bool long_p
)
16537 output_return_instrumentation ();
16539 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
16541 char thunk_name
[32];
16542 enum indirect_thunk_prefix need_prefix
16543 = indirect_thunk_need_prefix (current_output_insn
);
16545 if (cfun
->machine
->function_return_type
16546 != indirect_branch_thunk_inline
)
16548 bool need_thunk
= (cfun
->machine
->function_return_type
16549 == indirect_branch_thunk
);
16550 indirect_thunk_name (thunk_name
, INVALID_REGNUM
, need_prefix
,
16552 indirect_return_needed
|= need_thunk
;
16553 fprintf (asm_out_file
, "\tjmp\t");
16554 assemble_name (asm_out_file
, thunk_name
);
16555 putc ('\n', asm_out_file
);
16558 output_indirect_thunk (INVALID_REGNUM
);
16563 output_asm_insn (long_p
? "rep%; ret" : "ret", nullptr);
16564 return (ix86_harden_sls
& harden_sls_return
) ? "int3" : "";
16567 /* Output indirect function return. RET_OP is the function return
16571 ix86_output_indirect_function_return (rtx ret_op
)
16573 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
16575 char thunk_name
[32];
16576 enum indirect_thunk_prefix need_prefix
16577 = indirect_thunk_need_prefix (current_output_insn
);
16578 unsigned int regno
= REGNO (ret_op
);
16579 gcc_assert (regno
== CX_REG
);
16581 if (cfun
->machine
->function_return_type
16582 != indirect_branch_thunk_inline
)
16584 bool need_thunk
= (cfun
->machine
->function_return_type
16585 == indirect_branch_thunk
);
16586 indirect_thunk_name (thunk_name
, regno
, need_prefix
, true);
16590 indirect_return_via_cx
= true;
16591 SET_HARD_REG_BIT (indirect_thunks_used
, CX_REG
);
16593 fprintf (asm_out_file
, "\tjmp\t");
16594 assemble_name (asm_out_file
, thunk_name
);
16595 putc ('\n', asm_out_file
);
16598 output_indirect_thunk (regno
);
16602 output_asm_insn ("%!jmp\t%A0", &ret_op
);
16603 if (ix86_harden_sls
& harden_sls_indirect_jmp
)
16604 fputs ("\tint3\n", asm_out_file
);
16609 /* Output the assembly for a call instruction. */
16612 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
16614 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
16615 bool output_indirect_p
16617 && cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
);
16618 bool seh_nop_p
= false;
16621 if (SIBLING_CALL_P (insn
))
16623 output_return_instrumentation ();
16626 if (ix86_nopic_noplt_attribute_p (call_op
))
16631 if (output_indirect_p
)
16632 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16634 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16638 if (output_indirect_p
)
16639 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16641 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16645 xasm
= "%!jmp\t%P0";
16647 /* SEH epilogue detection requires the indirect branch case
16648 to include REX.W. */
16649 else if (TARGET_SEH
)
16650 xasm
= "%!rex.W jmp\t%A0";
16653 if (output_indirect_p
)
16656 xasm
= "%!jmp\t%A0";
16659 if (output_indirect_p
&& !direct_p
)
16660 ix86_output_indirect_branch (call_op
, xasm
, true);
16663 output_asm_insn (xasm
, &call_op
);
16665 && (ix86_harden_sls
& harden_sls_indirect_jmp
))
16671 /* SEH unwinding can require an extra nop to be emitted in several
16672 circumstances. Determine if we have one of those. */
16677 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
16679 /* Prevent a catch region from being adjacent to a jump that would
16680 be interpreted as an epilogue sequence by the unwinder. */
16681 if (JUMP_P(i
) && CROSSING_JUMP_P (i
))
16687 /* If we get to another real insn, we don't need the nop. */
16691 /* If we get to the epilogue note, prevent a catch region from
16692 being adjacent to the standard epilogue sequence. Note that,
16693 if non-call exceptions are enabled, we already did it during
16694 epilogue expansion, or else, if the insn can throw internally,
16695 we already did it during the reorg pass. */
16696 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
16697 && !flag_non_call_exceptions
16698 && !can_throw_internal (insn
))
16705 /* If we didn't find a real insn following the call, prevent the
16706 unwinder from looking into the next function. */
16713 if (ix86_nopic_noplt_attribute_p (call_op
))
16718 if (output_indirect_p
)
16719 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16721 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16725 if (output_indirect_p
)
16726 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16728 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16732 xasm
= "%!call\t%P0";
16736 if (output_indirect_p
)
16739 xasm
= "%!call\t%A0";
16742 if (output_indirect_p
&& !direct_p
)
16743 ix86_output_indirect_branch (call_op
, xasm
, false);
16745 output_asm_insn (xasm
, &call_op
);
16753 /* Return a MEM corresponding to a stack slot with mode MODE.
16754 Allocate a new slot if necessary.
16756 The RTL for a function can have several slots available: N is
16757 which slot to use. */
16760 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
16762 struct stack_local_entry
*s
;
16764 gcc_assert (n
< MAX_386_STACK_LOCALS
);
16766 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16767 if (s
->mode
== mode
&& s
->n
== n
)
16768 return validize_mem (copy_rtx (s
->rtl
));
16771 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
16772 alignment with -m32 -mpreferred-stack-boundary=2. */
16775 && n
== SLOT_FLOATxFDI_387
16776 && ix86_preferred_stack_boundary
< GET_MODE_ALIGNMENT (DImode
))
16778 s
= ggc_alloc
<stack_local_entry
> ();
16781 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), align
);
16783 s
->next
= ix86_stack_locals
;
16784 ix86_stack_locals
= s
;
16785 return validize_mem (copy_rtx (s
->rtl
));
16789 ix86_instantiate_decls (void)
16791 struct stack_local_entry
*s
;
16793 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16794 if (s
->rtl
!= NULL_RTX
)
16795 instantiate_decl_rtl (s
->rtl
);
16798 /* Check whether x86 address PARTS is a pc-relative address. */
16801 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
16803 rtx base
, index
, disp
;
16805 base
= parts
->base
;
16806 index
= parts
->index
;
16807 disp
= parts
->disp
;
16809 if (disp
&& !base
&& !index
)
16815 if (GET_CODE (disp
) == CONST
)
16816 symbol
= XEXP (disp
, 0);
16817 if (GET_CODE (symbol
) == PLUS
16818 && CONST_INT_P (XEXP (symbol
, 1)))
16819 symbol
= XEXP (symbol
, 0);
16821 if (GET_CODE (symbol
) == LABEL_REF
16822 || (GET_CODE (symbol
) == SYMBOL_REF
16823 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
16824 || (GET_CODE (symbol
) == UNSPEC
16825 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
16826 || XINT (symbol
, 1) == UNSPEC_PCREL
16827 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
16834 /* Calculate the length of the memory address in the instruction encoding.
16835 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16836 or other prefixes. We never generate addr32 prefix for LEA insn. */
16839 memory_address_length (rtx addr
, bool lea
)
16841 struct ix86_address parts
;
16842 rtx base
, index
, disp
;
16846 if (GET_CODE (addr
) == PRE_DEC
16847 || GET_CODE (addr
) == POST_INC
16848 || GET_CODE (addr
) == PRE_MODIFY
16849 || GET_CODE (addr
) == POST_MODIFY
)
16852 ok
= ix86_decompose_address (addr
, &parts
);
16855 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
16857 /* If this is not LEA instruction, add the length of addr32 prefix. */
16858 if (TARGET_64BIT
&& !lea
16859 && (SImode_address_operand (addr
, VOIDmode
)
16860 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
16861 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
16865 index
= parts
.index
;
16868 if (base
&& SUBREG_P (base
))
16869 base
= SUBREG_REG (base
);
16870 if (index
&& SUBREG_P (index
))
16871 index
= SUBREG_REG (index
);
16873 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
16874 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
16877 - esp as the base always wants an index,
16878 - ebp as the base always wants a displacement,
16879 - r12 as the base always wants an index,
16880 - r13 as the base always wants a displacement. */
16882 /* Register Indirect. */
16883 if (base
&& !index
&& !disp
)
16885 /* esp (for its index) and ebp (for its displacement) need
16886 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16888 if (base
== arg_pointer_rtx
16889 || base
== frame_pointer_rtx
16890 || REGNO (base
) == SP_REG
16891 || REGNO (base
) == BP_REG
16892 || REGNO (base
) == R12_REG
16893 || REGNO (base
) == R13_REG
)
16897 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16898 is not disp32, but disp32(%rip), so for disp32
16899 SIB byte is needed, unless print_operand_address
16900 optimizes it into disp32(%rip) or (%rip) is implied
16902 else if (disp
&& !base
&& !index
)
16905 if (!ix86_rip_relative_addr_p (&parts
))
16910 /* Find the length of the displacement constant. */
16913 if (base
&& satisfies_constraint_K (disp
))
16918 /* ebp always wants a displacement. Similarly r13. */
16919 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
16922 /* An index requires the two-byte modrm form.... */
16924 /* ...like esp (or r12), which always wants an index. */
16925 || base
== arg_pointer_rtx
16926 || base
== frame_pointer_rtx
16927 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
16934 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16935 is set, expect that insn have 8bit immediate alternative. */
16937 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
16941 extract_insn_cached (insn
);
16942 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16943 if (CONSTANT_P (recog_data
.operand
[i
]))
16945 enum attr_mode mode
= get_attr_mode (insn
);
16948 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
16950 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
16957 ival
= trunc_int_for_mode (ival
, HImode
);
16960 ival
= trunc_int_for_mode (ival
, SImode
);
16965 if (IN_RANGE (ival
, -128, 127))
16982 /* Immediates for DImode instructions are encoded
16983 as 32bit sign extended values. */
16988 fatal_insn ("unknown insn mode", insn
);
16994 /* Compute default value for "length_address" attribute. */
16996 ix86_attr_length_address_default (rtx_insn
*insn
)
17000 if (get_attr_type (insn
) == TYPE_LEA
)
17002 rtx set
= PATTERN (insn
), addr
;
17004 if (GET_CODE (set
) == PARALLEL
)
17005 set
= XVECEXP (set
, 0, 0);
17007 gcc_assert (GET_CODE (set
) == SET
);
17009 addr
= SET_SRC (set
);
17011 return memory_address_length (addr
, true);
17014 extract_insn_cached (insn
);
17015 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
17017 rtx op
= recog_data
.operand
[i
];
17020 constrain_operands_cached (insn
, reload_completed
);
17021 if (which_alternative
!= -1)
17023 const char *constraints
= recog_data
.constraints
[i
];
17024 int alt
= which_alternative
;
17026 while (*constraints
== '=' || *constraints
== '+')
17029 while (*constraints
++ != ',')
17031 /* Skip ignored operands. */
17032 if (*constraints
== 'X')
17036 int len
= memory_address_length (XEXP (op
, 0), false);
17038 /* Account for segment prefix for non-default addr spaces. */
17039 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
17048 /* Compute default value for "length_vex" attribute. It includes
17049 2 or 3 byte VEX prefix and 1 opcode byte. */
17052 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
17055 int i
, reg_only
= 2 + 1;
17056 bool has_mem
= false;
17058 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17059 byte VEX prefix. */
17060 if (!has_0f_opcode
|| has_vex_w
)
17063 /* We can always use 2 byte VEX prefix in 32bit. */
17067 extract_insn_cached (insn
);
17069 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
17070 if (REG_P (recog_data
.operand
[i
]))
17072 /* REX.W bit uses 3 byte VEX prefix. */
17073 if (GET_MODE (recog_data
.operand
[i
]) == DImode
17074 && GENERAL_REG_P (recog_data
.operand
[i
]))
17077 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17078 operand will be encoded using VEX.B, so be conservative. */
17079 if (REX_INT_REGNO_P (recog_data
.operand
[i
])
17080 || REX_SSE_REGNO_P (recog_data
.operand
[i
]))
17083 else if (MEM_P (recog_data
.operand
[i
]))
17085 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17086 if (x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
17092 return has_mem
? 2 + 1 : reg_only
;
17097 ix86_class_likely_spilled_p (reg_class_t
);
17099 /* Returns true if lhs of insn is HW function argument register and set up
17100 is_spilled to true if it is likely spilled HW register. */
17102 insn_is_function_arg (rtx insn
, bool* is_spilled
)
17106 if (!NONDEBUG_INSN_P (insn
))
17108 /* Call instructions are not movable, ignore it. */
17111 insn
= PATTERN (insn
);
17112 if (GET_CODE (insn
) == PARALLEL
)
17113 insn
= XVECEXP (insn
, 0, 0);
17114 if (GET_CODE (insn
) != SET
)
17116 dst
= SET_DEST (insn
);
17117 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
17118 && ix86_function_arg_regno_p (REGNO (dst
)))
17120 /* Is it likely spilled HW register? */
17121 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
17122 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
17123 *is_spilled
= true;
17129 /* Add output dependencies for chain of function adjacent arguments if only
17130 there is a move to likely spilled HW register. Return first argument
17131 if at least one dependence was added or NULL otherwise. */
17133 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
17136 rtx_insn
*last
= call
;
17137 rtx_insn
*first_arg
= NULL
;
17138 bool is_spilled
= false;
17140 head
= PREV_INSN (head
);
17142 /* Find nearest to call argument passing instruction. */
17145 last
= PREV_INSN (last
);
17148 if (!NONDEBUG_INSN_P (last
))
17150 if (insn_is_function_arg (last
, &is_spilled
))
17158 insn
= PREV_INSN (last
);
17159 if (!INSN_P (insn
))
17163 if (!NONDEBUG_INSN_P (insn
))
17168 if (insn_is_function_arg (insn
, &is_spilled
))
17170 /* Add output depdendence between two function arguments if chain
17171 of output arguments contains likely spilled HW registers. */
17173 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
17174 first_arg
= last
= insn
;
17184 /* Add output or anti dependency from insn to first_arg to restrict its code
17187 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
17192 set
= single_set (insn
);
17195 tmp
= SET_DEST (set
);
17198 /* Add output dependency to the first function argument. */
17199 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
17202 /* Add anti dependency. */
17203 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
17206 /* Avoid cross block motion of function argument through adding dependency
17207 from the first non-jump instruction in bb. */
17209 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
17211 rtx_insn
*insn
= BB_END (bb
);
17215 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
17217 rtx set
= single_set (insn
);
17220 avoid_func_arg_motion (arg
, insn
);
17224 if (insn
== BB_HEAD (bb
))
17226 insn
= PREV_INSN (insn
);
17230 /* Hook for pre-reload schedule - avoid motion of function arguments
17231 passed in likely spilled HW registers. */
17233 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
17236 rtx_insn
*first_arg
= NULL
;
17237 if (reload_completed
)
17239 while (head
!= tail
&& DEBUG_INSN_P (head
))
17240 head
= NEXT_INSN (head
);
17241 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
17242 if (INSN_P (insn
) && CALL_P (insn
))
17244 first_arg
= add_parameter_dependencies (insn
, head
);
17247 /* Add dependee for first argument to predecessors if only
17248 region contains more than one block. */
17249 basic_block bb
= BLOCK_FOR_INSN (insn
);
17250 int rgn
= CONTAINING_RGN (bb
->index
);
17251 int nr_blks
= RGN_NR_BLOCKS (rgn
);
17252 /* Skip trivial regions and region head blocks that can have
17253 predecessors outside of region. */
17254 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
17259 /* Regions are SCCs with the exception of selective
17260 scheduling with pipelining of outer blocks enabled.
17261 So also check that immediate predecessors of a non-head
17262 block are in the same region. */
17263 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17265 /* Avoid creating of loop-carried dependencies through
17266 using topological ordering in the region. */
17267 if (rgn
== CONTAINING_RGN (e
->src
->index
)
17268 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
17269 add_dependee_for_func_arg (first_arg
, e
->src
);
17277 else if (first_arg
)
17278 avoid_func_arg_motion (first_arg
, insn
);
17281 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17282 HW registers to maximum, to schedule them at soon as possible. These are
17283 moves from function argument registers at the top of the function entry
17284 and moves from function return value registers after call. */
17286 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
17290 if (reload_completed
)
17293 if (!NONDEBUG_INSN_P (insn
))
17296 set
= single_set (insn
);
17299 rtx tmp
= SET_SRC (set
);
17301 && HARD_REGISTER_P (tmp
)
17302 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
17303 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
17304 return current_sched_info
->sched_max_insns_priority
;
17310 /* Prepare for scheduling pass. */
17312 ix86_sched_init_global (FILE *, int, int)
17314 /* Install scheduling hooks for current CPU. Some of these hooks are used
17315 in time-critical parts of the scheduler, so we only set them up when
17316 they are actually used. */
17319 case PROCESSOR_CORE2
:
17320 case PROCESSOR_NEHALEM
:
17321 case PROCESSOR_SANDYBRIDGE
:
17322 case PROCESSOR_HASWELL
:
17323 case PROCESSOR_TREMONT
:
17324 case PROCESSOR_ALDERLAKE
:
17325 case PROCESSOR_GENERIC
:
17326 /* Do not perform multipass scheduling for pre-reload schedule
17327 to save compile time. */
17328 if (reload_completed
)
17330 ix86_core2i7_init_hooks ();
17333 /* Fall through. */
17335 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
17336 targetm
.sched
.first_cycle_multipass_init
= NULL
;
17337 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
17338 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
17339 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
17340 targetm
.sched
.first_cycle_multipass_end
= NULL
;
17341 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
17347 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17349 static HOST_WIDE_INT
17350 ix86_static_rtx_alignment (machine_mode mode
)
17352 if (mode
== DFmode
)
17354 if (ALIGN_MODE_128 (mode
))
17355 return MAX (128, GET_MODE_ALIGNMENT (mode
));
17356 return GET_MODE_ALIGNMENT (mode
);
17359 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17361 static HOST_WIDE_INT
17362 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
17364 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
17365 || TREE_CODE (exp
) == INTEGER_CST
)
17367 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
17368 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
17369 return MAX (mode_align
, align
);
17371 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
17372 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
17373 return BITS_PER_WORD
;
17378 /* Implement TARGET_EMPTY_RECORD_P. */
17381 ix86_is_empty_record (const_tree type
)
17385 return default_is_empty_record (type
);
17388 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17391 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
17393 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
17395 if (!cum
->warn_empty
)
17398 if (!TYPE_EMPTY_P (type
))
17401 /* Don't warn if the function isn't visible outside of the TU. */
17402 if (cum
->decl
&& !TREE_PUBLIC (cum
->decl
))
17405 const_tree ctx
= get_ultimate_context (cum
->decl
);
17406 if (ctx
!= NULL_TREE
17407 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
17410 /* If the actual size of the type is zero, then there is no change
17411 in how objects of this size are passed. */
17412 if (int_size_in_bytes (type
) == 0)
17415 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
17416 "changes in %<-fabi-version=12%> (GCC 8)", type
);
17418 /* Only warn once. */
17419 cum
->warn_empty
= false;
17422 /* This hook returns name of multilib ABI. */
17424 static const char *
17425 ix86_get_multilib_abi_name (void)
17427 if (!(TARGET_64BIT_P (ix86_isa_flags
)))
17429 else if (TARGET_X32_P (ix86_isa_flags
))
17435 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
17436 the data type, and ALIGN is the alignment that the object would
17437 ordinarily have. */
17440 iamcu_alignment (tree type
, int align
)
17444 if (align
< 32 || TYPE_USER_ALIGN (type
))
17447 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
17449 type
= strip_array_types (type
);
17450 if (TYPE_ATOMIC (type
))
17453 mode
= TYPE_MODE (type
);
17454 switch (GET_MODE_CLASS (mode
))
17457 case MODE_COMPLEX_INT
:
17458 case MODE_COMPLEX_FLOAT
:
17460 case MODE_DECIMAL_FLOAT
:
17467 /* Compute the alignment for a static variable.
17468 TYPE is the data type, and ALIGN is the alignment that
17469 the object would ordinarily have. The value of this function is used
17470 instead of that alignment to align the object. */
17473 ix86_data_alignment (tree type
, unsigned int align
, bool opt
)
17475 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
17476 for symbols from other compilation units or symbols that don't need
17477 to bind locally. In order to preserve some ABI compatibility with
17478 those compilers, ensure we don't decrease alignment from what we
17481 unsigned int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
17483 /* A data structure, equal or greater than the size of a cache line
17484 (64 bytes in the Pentium 4 and other recent Intel processors, including
17485 processors based on Intel Core microarchitecture) should be aligned
17486 so that its base address is a multiple of a cache line size. */
17488 unsigned int max_align
17489 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
17491 if (max_align
< BITS_PER_WORD
)
17492 max_align
= BITS_PER_WORD
;
17494 switch (ix86_align_data_type
)
17496 case ix86_align_data_type_abi
: opt
= false; break;
17497 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
17498 case ix86_align_data_type_cacheline
: break;
17502 align
= iamcu_alignment (type
, align
);
17505 && AGGREGATE_TYPE_P (type
)
17506 && TYPE_SIZE (type
)
17507 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
17509 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
17510 && align
< max_align_compat
)
17511 align
= max_align_compat
;
17512 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
17513 && align
< max_align
)
17517 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17518 to 16byte boundary. */
17521 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
17522 && TYPE_SIZE (type
)
17523 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
17524 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
17532 if (TREE_CODE (type
) == ARRAY_TYPE
)
17534 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
17536 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
17539 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
17542 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
17544 if ((TYPE_MODE (type
) == XCmode
17545 || TYPE_MODE (type
) == TCmode
) && align
< 128)
17548 else if (RECORD_OR_UNION_TYPE_P (type
)
17549 && TYPE_FIELDS (type
))
17551 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
17553 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
17556 else if (SCALAR_FLOAT_TYPE_P (type
) || VECTOR_TYPE_P (type
)
17557 || TREE_CODE (type
) == INTEGER_TYPE
)
17559 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
17561 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
17568 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
17570 ix86_lower_local_decl_alignment (tree decl
)
17572 unsigned int new_align
= ix86_local_alignment (decl
, VOIDmode
,
17573 DECL_ALIGN (decl
), true);
17574 if (new_align
< DECL_ALIGN (decl
))
17575 SET_DECL_ALIGN (decl
, new_align
);
17578 /* Compute the alignment for a local variable or a stack slot. EXP is
17579 the data type or decl itself, MODE is the widest mode available and
17580 ALIGN is the alignment that the object would ordinarily have. The
17581 value of this macro is used instead of that alignment to align the
17585 ix86_local_alignment (tree exp
, machine_mode mode
,
17586 unsigned int align
, bool may_lower
)
17590 if (exp
&& DECL_P (exp
))
17592 type
= TREE_TYPE (exp
);
17601 /* Don't do dynamic stack realignment for long long objects with
17602 -mpreferred-stack-boundary=2. */
17606 && ix86_preferred_stack_boundary
< 64
17607 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
17608 && (!type
|| (!TYPE_USER_ALIGN (type
)
17609 && !TYPE_ATOMIC (strip_array_types (type
))))
17610 && (!decl
|| !DECL_USER_ALIGN (decl
)))
17613 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17614 register in MODE. We will return the largest alignment of XF
17618 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
17619 align
= GET_MODE_ALIGNMENT (DFmode
);
17623 /* Don't increase alignment for Intel MCU psABI. */
17627 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17628 to 16byte boundary. Exact wording is:
17630 An array uses the same alignment as its elements, except that a local or
17631 global array variable of length at least 16 bytes or
17632 a C99 variable-length array variable always has alignment of at least 16 bytes.
17634 This was added to allow use of aligned SSE instructions at arrays. This
17635 rule is meant for static storage (where compiler cannot do the analysis
17636 by itself). We follow it for automatic variables only when convenient.
17637 We fully control everything in the function compiled and functions from
17638 other unit cannot rely on the alignment.
17640 Exclude va_list type. It is the common case of local array where
17641 we cannot benefit from the alignment.
17643 TODO: Probably one should optimize for size only when var is not escaping. */
17644 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
17647 if (AGGREGATE_TYPE_P (type
)
17648 && (va_list_type_node
== NULL_TREE
17649 || (TYPE_MAIN_VARIANT (type
)
17650 != TYPE_MAIN_VARIANT (va_list_type_node
)))
17651 && TYPE_SIZE (type
)
17652 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
17653 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
17657 if (TREE_CODE (type
) == ARRAY_TYPE
)
17659 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
17661 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
17664 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
17666 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
17668 if ((TYPE_MODE (type
) == XCmode
17669 || TYPE_MODE (type
) == TCmode
) && align
< 128)
17672 else if (RECORD_OR_UNION_TYPE_P (type
)
17673 && TYPE_FIELDS (type
))
17675 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
17677 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
17680 else if (SCALAR_FLOAT_TYPE_P (type
) || VECTOR_TYPE_P (type
)
17681 || TREE_CODE (type
) == INTEGER_TYPE
)
17684 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
17686 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
17692 /* Compute the minimum required alignment for dynamic stack realignment
17693 purposes for a local variable, parameter or a stack slot. EXP is
17694 the data type or decl itself, MODE is its mode and ALIGN is the
17695 alignment that the object would ordinarily have. */
17698 ix86_minimum_alignment (tree exp
, machine_mode mode
,
17699 unsigned int align
)
17703 if (exp
&& DECL_P (exp
))
17705 type
= TREE_TYPE (exp
);
17714 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
17717 /* Don't do dynamic stack realignment for long long objects with
17718 -mpreferred-stack-boundary=2. */
17719 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
17720 && (!type
|| (!TYPE_USER_ALIGN (type
)
17721 && !TYPE_ATOMIC (strip_array_types (type
))))
17722 && (!decl
|| !DECL_USER_ALIGN (decl
)))
17724 gcc_checking_assert (!TARGET_STV
);
17731 /* Find a location for the static chain incoming to a nested function.
17732 This is a register, unless all free registers are used by arguments. */
17735 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
17741 /* We always use R10 in 64-bit mode. */
17746 const_tree fntype
, fndecl
;
17749 /* By default in 32-bit mode we use ECX to pass the static chain. */
17752 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
17754 fntype
= TREE_TYPE (fndecl_or_type
);
17755 fndecl
= fndecl_or_type
;
17759 fntype
= fndecl_or_type
;
17763 ccvt
= ix86_get_callcvt (fntype
);
17764 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
17766 /* Fastcall functions use ecx/edx for arguments, which leaves
17767 us with EAX for the static chain.
17768 Thiscall functions use ecx for arguments, which also
17769 leaves us with EAX for the static chain. */
17772 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
17774 /* Thiscall functions use ecx for arguments, which leaves
17775 us with EAX and EDX for the static chain.
17776 We are using for abi-compatibility EAX. */
17779 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
17781 /* For regparm 3, we have no free call-clobbered registers in
17782 which to store the static chain. In order to implement this,
17783 we have the trampoline push the static chain to the stack.
17784 However, we can't push a value below the return address when
17785 we call the nested function directly, so we have to use an
17786 alternate entry point. For this we use ESI, and have the
17787 alternate entry point push ESI, so that things appear the
17788 same once we're executing the nested function. */
17791 if (fndecl
== current_function_decl
17792 && !ix86_static_chain_on_stack
)
17794 gcc_assert (!reload_completed
);
17795 ix86_static_chain_on_stack
= true;
17797 return gen_frame_mem (SImode
,
17798 plus_constant (Pmode
,
17799 arg_pointer_rtx
, -8));
17805 return gen_rtx_REG (Pmode
, regno
);
17808 /* Emit RTL insns to initialize the variable parts of a trampoline.
17809 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17810 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17811 to be passed to the target function. */
17814 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
17819 bool need_endbr
= (flag_cf_protection
& CF_BRANCH
);
17821 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
17829 /* Insert ENDBR64. */
17830 mem
= adjust_address (m_tramp
, SImode
, offset
);
17831 emit_move_insn (mem
, gen_int_mode (0xfa1e0ff3, SImode
));
17835 /* Load the function address to r11. Try to load address using
17836 the shorter movl instead of movabs. We may want to support
17837 movq for kernel mode, but kernel does not use trampolines at
17838 the moment. FNADDR is a 32bit address and may not be in
17839 DImode when ptr_mode == SImode. Always use movl in this
17841 if (ptr_mode
== SImode
17842 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
17844 fnaddr
= copy_addr_to_reg (fnaddr
);
17846 mem
= adjust_address (m_tramp
, HImode
, offset
);
17847 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
17849 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
17850 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
17855 mem
= adjust_address (m_tramp
, HImode
, offset
);
17856 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
17858 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
17859 emit_move_insn (mem
, fnaddr
);
17863 /* Load static chain using movabs to r10. Use the shorter movl
17864 instead of movabs when ptr_mode == SImode. */
17865 if (ptr_mode
== SImode
)
17876 mem
= adjust_address (m_tramp
, HImode
, offset
);
17877 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
17879 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
17880 emit_move_insn (mem
, chain_value
);
17883 /* Jump to r11; the last (unused) byte is a nop, only there to
17884 pad the write out to a single 32-bit store. */
17885 mem
= adjust_address (m_tramp
, SImode
, offset
);
17886 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
17893 /* Depending on the static chain location, either load a register
17894 with a constant, or push the constant to the stack. All of the
17895 instructions are the same size. */
17896 chain
= ix86_static_chain (fndecl
, true);
17899 switch (REGNO (chain
))
17902 opcode
= 0xb8; break;
17904 opcode
= 0xb9; break;
17906 gcc_unreachable ();
17914 /* Insert ENDBR32. */
17915 mem
= adjust_address (m_tramp
, SImode
, offset
);
17916 emit_move_insn (mem
, gen_int_mode (0xfb1e0ff3, SImode
));
17920 mem
= adjust_address (m_tramp
, QImode
, offset
);
17921 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
17923 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
17924 emit_move_insn (mem
, chain_value
);
17927 mem
= adjust_address (m_tramp
, QImode
, offset
);
17928 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
17930 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
17932 /* Compute offset from the end of the jmp to the target function.
17933 In the case in which the trampoline stores the static chain on
17934 the stack, we need to skip the first insn which pushes the
17935 (call-saved) register static chain; this push is 1 byte. */
17937 int skip
= MEM_P (chain
) ? 1 : 0;
17938 /* Skip ENDBR32 at the entry of the target function. */
17940 && !cgraph_node::get (fndecl
)->only_called_directly_p ())
17942 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
17943 plus_constant (Pmode
, XEXP (m_tramp
, 0),
17945 NULL_RTX
, 1, OPTAB_DIRECT
);
17946 emit_move_insn (mem
, disp
);
17949 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
17951 #ifdef HAVE_ENABLE_EXECUTE_STACK
17952 #ifdef CHECK_EXECUTE_STACK_ENABLED
17953 if (CHECK_EXECUTE_STACK_ENABLED
)
17955 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
17956 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
17961 ix86_allocate_stack_slots_for_args (void)
17963 /* Naked functions should not allocate stack slots for arguments. */
17964 return !ix86_function_naked (current_function_decl
);
17968 ix86_warn_func_return (tree decl
)
17970 /* Naked functions are implemented entirely in assembly, including the
17971 return sequence, so suppress warnings about this. */
17972 return !ix86_function_naked (decl
);
17975 /* Return the shift count of a vector by scalar shift builtin second argument
17978 ix86_vector_shift_count (tree arg1
)
17980 if (tree_fits_uhwi_p (arg1
))
17982 else if (TREE_CODE (arg1
) == VECTOR_CST
&& CHAR_BIT
== 8)
17984 /* The count argument is weird, passed in as various 128-bit
17985 (or 64-bit) vectors, the low 64 bits from it are the count. */
17986 unsigned char buf
[16];
17987 int len
= native_encode_expr (arg1
, buf
, 16);
17990 tree t
= native_interpret_expr (uint64_type_node
, buf
, len
);
17991 if (t
&& tree_fits_uhwi_p (t
))
17997 /* Return true if arg_mask is all ones, ELEMS is elements number of
17998 corresponding vector. */
18000 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems
, tree arg_mask
)
18002 if (TREE_CODE (arg_mask
) != INTEGER_CST
)
18005 unsigned HOST_WIDE_INT mask
= TREE_INT_CST_LOW (arg_mask
);
18006 if (elems
== HOST_BITS_PER_WIDE_INT
)
18007 return mask
== HOST_WIDE_INT_M1U
;
18008 if ((mask
| (HOST_WIDE_INT_M1U
<< elems
)) != HOST_WIDE_INT_M1U
)
18015 ix86_fold_builtin (tree fndecl
, int n_args
,
18016 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
18018 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
18020 enum ix86_builtins fn_code
18021 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
18022 enum rtx_code rcode
;
18024 unsigned HOST_WIDE_INT mask
;
18028 case IX86_BUILTIN_CPU_IS
:
18029 case IX86_BUILTIN_CPU_SUPPORTS
:
18030 gcc_assert (n_args
== 1);
18031 return fold_builtin_cpu (fndecl
, args
);
18033 case IX86_BUILTIN_NANQ
:
18034 case IX86_BUILTIN_NANSQ
:
18036 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18037 const char *str
= c_getstr (*args
);
18038 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
18039 REAL_VALUE_TYPE real
;
18041 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
18042 return build_real (type
, real
);
18046 case IX86_BUILTIN_INFQ
:
18047 case IX86_BUILTIN_HUGE_VALQ
:
18049 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18050 REAL_VALUE_TYPE inf
;
18052 return build_real (type
, inf
);
18055 case IX86_BUILTIN_TZCNT16
:
18056 case IX86_BUILTIN_CTZS
:
18057 case IX86_BUILTIN_TZCNT32
:
18058 case IX86_BUILTIN_TZCNT64
:
18059 gcc_assert (n_args
== 1);
18060 if (TREE_CODE (args
[0]) == INTEGER_CST
)
18062 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18063 tree arg
= args
[0];
18064 if (fn_code
== IX86_BUILTIN_TZCNT16
18065 || fn_code
== IX86_BUILTIN_CTZS
)
18066 arg
= fold_convert (short_unsigned_type_node
, arg
);
18067 if (integer_zerop (arg
))
18068 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
18070 return fold_const_call (CFN_CTZ
, type
, arg
);
18074 case IX86_BUILTIN_LZCNT16
:
18075 case IX86_BUILTIN_CLZS
:
18076 case IX86_BUILTIN_LZCNT32
:
18077 case IX86_BUILTIN_LZCNT64
:
18078 gcc_assert (n_args
== 1);
18079 if (TREE_CODE (args
[0]) == INTEGER_CST
)
18081 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18082 tree arg
= args
[0];
18083 if (fn_code
== IX86_BUILTIN_LZCNT16
18084 || fn_code
== IX86_BUILTIN_CLZS
)
18085 arg
= fold_convert (short_unsigned_type_node
, arg
);
18086 if (integer_zerop (arg
))
18087 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
18089 return fold_const_call (CFN_CLZ
, type
, arg
);
18093 case IX86_BUILTIN_BEXTR32
:
18094 case IX86_BUILTIN_BEXTR64
:
18095 case IX86_BUILTIN_BEXTRI32
:
18096 case IX86_BUILTIN_BEXTRI64
:
18097 gcc_assert (n_args
== 2);
18098 if (tree_fits_uhwi_p (args
[1]))
18100 unsigned HOST_WIDE_INT res
= 0;
18101 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
18102 unsigned int start
= tree_to_uhwi (args
[1]);
18103 unsigned int len
= (start
& 0xff00) >> 8;
18105 if (start
>= prec
|| len
== 0)
18107 else if (!tree_fits_uhwi_p (args
[0]))
18110 res
= tree_to_uhwi (args
[0]) >> start
;
18113 if (len
< HOST_BITS_PER_WIDE_INT
)
18114 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
18115 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18119 case IX86_BUILTIN_BZHI32
:
18120 case IX86_BUILTIN_BZHI64
:
18121 gcc_assert (n_args
== 2);
18122 if (tree_fits_uhwi_p (args
[1]))
18124 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
18125 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
18128 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), 0);
18129 if (!tree_fits_uhwi_p (args
[0]))
18131 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
18132 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
18133 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18137 case IX86_BUILTIN_PDEP32
:
18138 case IX86_BUILTIN_PDEP64
:
18139 gcc_assert (n_args
== 2);
18140 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
18142 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
18143 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
18144 unsigned HOST_WIDE_INT res
= 0;
18145 unsigned HOST_WIDE_INT m
, k
= 1;
18146 for (m
= 1; m
; m
<<= 1)
18147 if ((mask
& m
) != 0)
18149 if ((src
& k
) != 0)
18153 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18157 case IX86_BUILTIN_PEXT32
:
18158 case IX86_BUILTIN_PEXT64
:
18159 gcc_assert (n_args
== 2);
18160 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
18162 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
18163 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
18164 unsigned HOST_WIDE_INT res
= 0;
18165 unsigned HOST_WIDE_INT m
, k
= 1;
18166 for (m
= 1; m
; m
<<= 1)
18167 if ((mask
& m
) != 0)
18169 if ((src
& m
) != 0)
18173 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18177 case IX86_BUILTIN_MOVMSKPS
:
18178 case IX86_BUILTIN_PMOVMSKB
:
18179 case IX86_BUILTIN_MOVMSKPD
:
18180 case IX86_BUILTIN_PMOVMSKB128
:
18181 case IX86_BUILTIN_MOVMSKPD256
:
18182 case IX86_BUILTIN_MOVMSKPS256
:
18183 case IX86_BUILTIN_PMOVMSKB256
:
18184 gcc_assert (n_args
== 1);
18185 if (TREE_CODE (args
[0]) == VECTOR_CST
)
18187 HOST_WIDE_INT res
= 0;
18188 for (unsigned i
= 0; i
< VECTOR_CST_NELTS (args
[0]); ++i
)
18190 tree e
= VECTOR_CST_ELT (args
[0], i
);
18191 if (TREE_CODE (e
) == INTEGER_CST
&& !TREE_OVERFLOW (e
))
18193 if (wi::neg_p (wi::to_wide (e
)))
18194 res
|= HOST_WIDE_INT_1
<< i
;
18196 else if (TREE_CODE (e
) == REAL_CST
&& !TREE_OVERFLOW (e
))
18198 if (TREE_REAL_CST (e
).sign
)
18199 res
|= HOST_WIDE_INT_1
<< i
;
18204 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18208 case IX86_BUILTIN_PSLLD
:
18209 case IX86_BUILTIN_PSLLD128
:
18210 case IX86_BUILTIN_PSLLD128_MASK
:
18211 case IX86_BUILTIN_PSLLD256
:
18212 case IX86_BUILTIN_PSLLD256_MASK
:
18213 case IX86_BUILTIN_PSLLD512
:
18214 case IX86_BUILTIN_PSLLDI
:
18215 case IX86_BUILTIN_PSLLDI128
:
18216 case IX86_BUILTIN_PSLLDI128_MASK
:
18217 case IX86_BUILTIN_PSLLDI256
:
18218 case IX86_BUILTIN_PSLLDI256_MASK
:
18219 case IX86_BUILTIN_PSLLDI512
:
18220 case IX86_BUILTIN_PSLLQ
:
18221 case IX86_BUILTIN_PSLLQ128
:
18222 case IX86_BUILTIN_PSLLQ128_MASK
:
18223 case IX86_BUILTIN_PSLLQ256
:
18224 case IX86_BUILTIN_PSLLQ256_MASK
:
18225 case IX86_BUILTIN_PSLLQ512
:
18226 case IX86_BUILTIN_PSLLQI
:
18227 case IX86_BUILTIN_PSLLQI128
:
18228 case IX86_BUILTIN_PSLLQI128_MASK
:
18229 case IX86_BUILTIN_PSLLQI256
:
18230 case IX86_BUILTIN_PSLLQI256_MASK
:
18231 case IX86_BUILTIN_PSLLQI512
:
18232 case IX86_BUILTIN_PSLLW
:
18233 case IX86_BUILTIN_PSLLW128
:
18234 case IX86_BUILTIN_PSLLW128_MASK
:
18235 case IX86_BUILTIN_PSLLW256
:
18236 case IX86_BUILTIN_PSLLW256_MASK
:
18237 case IX86_BUILTIN_PSLLW512_MASK
:
18238 case IX86_BUILTIN_PSLLWI
:
18239 case IX86_BUILTIN_PSLLWI128
:
18240 case IX86_BUILTIN_PSLLWI128_MASK
:
18241 case IX86_BUILTIN_PSLLWI256
:
18242 case IX86_BUILTIN_PSLLWI256_MASK
:
18243 case IX86_BUILTIN_PSLLWI512_MASK
:
18247 case IX86_BUILTIN_PSRAD
:
18248 case IX86_BUILTIN_PSRAD128
:
18249 case IX86_BUILTIN_PSRAD128_MASK
:
18250 case IX86_BUILTIN_PSRAD256
:
18251 case IX86_BUILTIN_PSRAD256_MASK
:
18252 case IX86_BUILTIN_PSRAD512
:
18253 case IX86_BUILTIN_PSRADI
:
18254 case IX86_BUILTIN_PSRADI128
:
18255 case IX86_BUILTIN_PSRADI128_MASK
:
18256 case IX86_BUILTIN_PSRADI256
:
18257 case IX86_BUILTIN_PSRADI256_MASK
:
18258 case IX86_BUILTIN_PSRADI512
:
18259 case IX86_BUILTIN_PSRAQ128_MASK
:
18260 case IX86_BUILTIN_PSRAQ256_MASK
:
18261 case IX86_BUILTIN_PSRAQ512
:
18262 case IX86_BUILTIN_PSRAQI128_MASK
:
18263 case IX86_BUILTIN_PSRAQI256_MASK
:
18264 case IX86_BUILTIN_PSRAQI512
:
18265 case IX86_BUILTIN_PSRAW
:
18266 case IX86_BUILTIN_PSRAW128
:
18267 case IX86_BUILTIN_PSRAW128_MASK
:
18268 case IX86_BUILTIN_PSRAW256
:
18269 case IX86_BUILTIN_PSRAW256_MASK
:
18270 case IX86_BUILTIN_PSRAW512
:
18271 case IX86_BUILTIN_PSRAWI
:
18272 case IX86_BUILTIN_PSRAWI128
:
18273 case IX86_BUILTIN_PSRAWI128_MASK
:
18274 case IX86_BUILTIN_PSRAWI256
:
18275 case IX86_BUILTIN_PSRAWI256_MASK
:
18276 case IX86_BUILTIN_PSRAWI512
:
18280 case IX86_BUILTIN_PSRLD
:
18281 case IX86_BUILTIN_PSRLD128
:
18282 case IX86_BUILTIN_PSRLD128_MASK
:
18283 case IX86_BUILTIN_PSRLD256
:
18284 case IX86_BUILTIN_PSRLD256_MASK
:
18285 case IX86_BUILTIN_PSRLD512
:
18286 case IX86_BUILTIN_PSRLDI
:
18287 case IX86_BUILTIN_PSRLDI128
:
18288 case IX86_BUILTIN_PSRLDI128_MASK
:
18289 case IX86_BUILTIN_PSRLDI256
:
18290 case IX86_BUILTIN_PSRLDI256_MASK
:
18291 case IX86_BUILTIN_PSRLDI512
:
18292 case IX86_BUILTIN_PSRLQ
:
18293 case IX86_BUILTIN_PSRLQ128
:
18294 case IX86_BUILTIN_PSRLQ128_MASK
:
18295 case IX86_BUILTIN_PSRLQ256
:
18296 case IX86_BUILTIN_PSRLQ256_MASK
:
18297 case IX86_BUILTIN_PSRLQ512
:
18298 case IX86_BUILTIN_PSRLQI
:
18299 case IX86_BUILTIN_PSRLQI128
:
18300 case IX86_BUILTIN_PSRLQI128_MASK
:
18301 case IX86_BUILTIN_PSRLQI256
:
18302 case IX86_BUILTIN_PSRLQI256_MASK
:
18303 case IX86_BUILTIN_PSRLQI512
:
18304 case IX86_BUILTIN_PSRLW
:
18305 case IX86_BUILTIN_PSRLW128
:
18306 case IX86_BUILTIN_PSRLW128_MASK
:
18307 case IX86_BUILTIN_PSRLW256
:
18308 case IX86_BUILTIN_PSRLW256_MASK
:
18309 case IX86_BUILTIN_PSRLW512
:
18310 case IX86_BUILTIN_PSRLWI
:
18311 case IX86_BUILTIN_PSRLWI128
:
18312 case IX86_BUILTIN_PSRLWI128_MASK
:
18313 case IX86_BUILTIN_PSRLWI256
:
18314 case IX86_BUILTIN_PSRLWI256_MASK
:
18315 case IX86_BUILTIN_PSRLWI512
:
18319 case IX86_BUILTIN_PSLLVV16HI
:
18320 case IX86_BUILTIN_PSLLVV16SI
:
18321 case IX86_BUILTIN_PSLLVV2DI
:
18322 case IX86_BUILTIN_PSLLVV2DI_MASK
:
18323 case IX86_BUILTIN_PSLLVV32HI
:
18324 case IX86_BUILTIN_PSLLVV4DI
:
18325 case IX86_BUILTIN_PSLLVV4DI_MASK
:
18326 case IX86_BUILTIN_PSLLVV4SI
:
18327 case IX86_BUILTIN_PSLLVV4SI_MASK
:
18328 case IX86_BUILTIN_PSLLVV8DI
:
18329 case IX86_BUILTIN_PSLLVV8HI
:
18330 case IX86_BUILTIN_PSLLVV8SI
:
18331 case IX86_BUILTIN_PSLLVV8SI_MASK
:
18335 case IX86_BUILTIN_PSRAVQ128
:
18336 case IX86_BUILTIN_PSRAVQ256
:
18337 case IX86_BUILTIN_PSRAVV16HI
:
18338 case IX86_BUILTIN_PSRAVV16SI
:
18339 case IX86_BUILTIN_PSRAVV32HI
:
18340 case IX86_BUILTIN_PSRAVV4SI
:
18341 case IX86_BUILTIN_PSRAVV4SI_MASK
:
18342 case IX86_BUILTIN_PSRAVV8DI
:
18343 case IX86_BUILTIN_PSRAVV8HI
:
18344 case IX86_BUILTIN_PSRAVV8SI
:
18345 case IX86_BUILTIN_PSRAVV8SI_MASK
:
18349 case IX86_BUILTIN_PSRLVV16HI
:
18350 case IX86_BUILTIN_PSRLVV16SI
:
18351 case IX86_BUILTIN_PSRLVV2DI
:
18352 case IX86_BUILTIN_PSRLVV2DI_MASK
:
18353 case IX86_BUILTIN_PSRLVV32HI
:
18354 case IX86_BUILTIN_PSRLVV4DI
:
18355 case IX86_BUILTIN_PSRLVV4DI_MASK
:
18356 case IX86_BUILTIN_PSRLVV4SI
:
18357 case IX86_BUILTIN_PSRLVV4SI_MASK
:
18358 case IX86_BUILTIN_PSRLVV8DI
:
18359 case IX86_BUILTIN_PSRLVV8HI
:
18360 case IX86_BUILTIN_PSRLVV8SI
:
18361 case IX86_BUILTIN_PSRLVV8SI_MASK
:
18367 gcc_assert (n_args
>= 2);
18368 if (TREE_CODE (args
[0]) != VECTOR_CST
)
18370 mask
= HOST_WIDE_INT_M1U
;
18373 /* This is masked shift. */
18374 if (!tree_fits_uhwi_p (args
[n_args
- 1])
18375 || TREE_SIDE_EFFECTS (args
[n_args
- 2]))
18377 mask
= tree_to_uhwi (args
[n_args
- 1]);
18378 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0]));
18379 mask
|= HOST_WIDE_INT_M1U
<< elems
;
18380 if (mask
!= HOST_WIDE_INT_M1U
18381 && TREE_CODE (args
[n_args
- 2]) != VECTOR_CST
)
18383 if (mask
== (HOST_WIDE_INT_M1U
<< elems
))
18384 return args
[n_args
- 2];
18386 if (is_vshift
&& TREE_CODE (args
[1]) != VECTOR_CST
)
18388 if (tree tem
= (is_vshift
? integer_one_node
18389 : ix86_vector_shift_count (args
[1])))
18391 unsigned HOST_WIDE_INT count
= tree_to_uhwi (tem
);
18392 unsigned HOST_WIDE_INT prec
18393 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args
[0])));
18394 if (count
== 0 && mask
== HOST_WIDE_INT_M1U
)
18398 if (rcode
== ASHIFTRT
)
18400 else if (mask
== HOST_WIDE_INT_M1U
)
18401 return build_zero_cst (TREE_TYPE (args
[0]));
18403 tree countt
= NULL_TREE
;
18407 countt
= integer_zero_node
;
18409 countt
= build_int_cst (integer_type_node
, count
);
18411 tree_vector_builder builder
;
18412 if (mask
!= HOST_WIDE_INT_M1U
|| is_vshift
)
18413 builder
.new_vector (TREE_TYPE (args
[0]),
18414 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0])),
18417 builder
.new_unary_operation (TREE_TYPE (args
[0]), args
[0],
18419 unsigned int cnt
= builder
.encoded_nelts ();
18420 for (unsigned int i
= 0; i
< cnt
; ++i
)
18422 tree elt
= VECTOR_CST_ELT (args
[0], i
);
18423 if (TREE_CODE (elt
) != INTEGER_CST
|| TREE_OVERFLOW (elt
))
18425 tree type
= TREE_TYPE (elt
);
18426 if (rcode
== LSHIFTRT
)
18427 elt
= fold_convert (unsigned_type_for (type
), elt
);
18430 countt
= VECTOR_CST_ELT (args
[1], i
);
18431 if (TREE_CODE (countt
) != INTEGER_CST
18432 || TREE_OVERFLOW (countt
))
18434 if (wi::neg_p (wi::to_wide (countt
))
18435 || wi::to_widest (countt
) >= prec
)
18437 if (rcode
== ASHIFTRT
)
18438 countt
= build_int_cst (TREE_TYPE (countt
),
18442 elt
= build_zero_cst (TREE_TYPE (elt
));
18443 countt
= build_zero_cst (TREE_TYPE (countt
));
18447 else if (count
>= prec
)
18448 elt
= build_zero_cst (TREE_TYPE (elt
));
18449 elt
= const_binop (rcode
== ASHIFT
18450 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
18451 TREE_TYPE (elt
), elt
, countt
);
18452 if (!elt
|| TREE_CODE (elt
) != INTEGER_CST
)
18454 if (rcode
== LSHIFTRT
)
18455 elt
= fold_convert (type
, elt
);
18456 if ((mask
& (HOST_WIDE_INT_1U
<< i
)) == 0)
18458 elt
= VECTOR_CST_ELT (args
[n_args
- 2], i
);
18459 if (TREE_CODE (elt
) != INTEGER_CST
18460 || TREE_OVERFLOW (elt
))
18463 builder
.quick_push (elt
);
18465 return builder
.build ();
18474 #ifdef SUBTARGET_FOLD_BUILTIN
18475 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
18481 /* Fold a MD builtin (use ix86_fold_builtin for folding into
18482 constant) in GIMPLE. */
18485 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
18487 gimple
*stmt
= gsi_stmt (*gsi
), *g
;
18488 gimple_seq stmts
= NULL
;
18489 tree fndecl
= gimple_call_fndecl (stmt
);
18490 gcc_checking_assert (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
));
18491 int n_args
= gimple_call_num_args (stmt
);
18492 enum ix86_builtins fn_code
18493 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
18494 tree decl
= NULL_TREE
;
18495 tree arg0
, arg1
, arg2
;
18496 enum rtx_code rcode
;
18497 enum tree_code tcode
;
18498 unsigned HOST_WIDE_INT count
;
18500 unsigned HOST_WIDE_INT elems
;
18503 /* Don't fold when there's isa mismatch. */
18504 if (!ix86_check_builtin_isa_match (fn_code
, NULL
, NULL
))
18509 case IX86_BUILTIN_TZCNT32
:
18510 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
18511 goto fold_tzcnt_lzcnt
;
18513 case IX86_BUILTIN_TZCNT64
:
18514 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
18515 goto fold_tzcnt_lzcnt
;
18517 case IX86_BUILTIN_LZCNT32
:
18518 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
18519 goto fold_tzcnt_lzcnt
;
18521 case IX86_BUILTIN_LZCNT64
:
18522 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
18523 goto fold_tzcnt_lzcnt
;
18526 gcc_assert (n_args
== 1);
18527 arg0
= gimple_call_arg (stmt
, 0);
18528 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
18530 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
18531 /* If arg0 is provably non-zero, optimize into generic
18532 __builtin_c[tl]z{,ll} function the middle-end handles
18534 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
18537 loc
= gimple_location (stmt
);
18538 g
= gimple_build_call (decl
, 1, arg0
);
18539 gimple_set_location (g
, loc
);
18540 tree lhs
= make_ssa_name (integer_type_node
);
18541 gimple_call_set_lhs (g
, lhs
);
18542 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
18543 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
18544 gimple_set_location (g
, loc
);
18545 gsi_replace (gsi
, g
, false);
18550 case IX86_BUILTIN_BZHI32
:
18551 case IX86_BUILTIN_BZHI64
:
18552 gcc_assert (n_args
== 2);
18553 arg1
= gimple_call_arg (stmt
, 1);
18554 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
18556 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
18557 arg0
= gimple_call_arg (stmt
, 0);
18558 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
18560 loc
= gimple_location (stmt
);
18561 g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
18562 gimple_set_location (g
, loc
);
18563 gsi_replace (gsi
, g
, false);
18568 case IX86_BUILTIN_PDEP32
:
18569 case IX86_BUILTIN_PDEP64
:
18570 case IX86_BUILTIN_PEXT32
:
18571 case IX86_BUILTIN_PEXT64
:
18572 gcc_assert (n_args
== 2);
18573 arg1
= gimple_call_arg (stmt
, 1);
18574 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
18576 loc
= gimple_location (stmt
);
18577 arg0
= gimple_call_arg (stmt
, 0);
18578 g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
18579 gimple_set_location (g
, loc
);
18580 gsi_replace (gsi
, g
, false);
18585 case IX86_BUILTIN_PBLENDVB256
:
18586 case IX86_BUILTIN_BLENDVPS256
:
18587 case IX86_BUILTIN_BLENDVPD256
:
18588 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
18589 to scalar operations and not combined back. */
18594 case IX86_BUILTIN_BLENDVPD
:
18595 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
18596 w/o sse4.2, it's veclowered to scalar operations and
18597 not combined back. */
18598 if (!TARGET_SSE4_2
)
18601 case IX86_BUILTIN_PBLENDVB128
:
18602 case IX86_BUILTIN_BLENDVPS
:
18603 gcc_assert (n_args
== 3);
18604 arg0
= gimple_call_arg (stmt
, 0);
18605 arg1
= gimple_call_arg (stmt
, 1);
18606 arg2
= gimple_call_arg (stmt
, 2);
18607 if (gimple_call_lhs (stmt
))
18609 loc
= gimple_location (stmt
);
18610 tree type
= TREE_TYPE (arg2
);
18611 if (VECTOR_FLOAT_TYPE_P (type
))
18613 tree itype
= GET_MODE_INNER (TYPE_MODE (type
)) == E_SFmode
18614 ? intSI_type_node
: intDI_type_node
;
18615 type
= get_same_sized_vectype (itype
, type
);
18618 type
= signed_type_for (type
);
18619 arg2
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
, type
, arg2
);
18620 tree zero_vec
= build_zero_cst (type
);
18621 tree cmp_type
= truth_type_for (type
);
18622 tree cmp
= gimple_build (&stmts
, LT_EXPR
, cmp_type
, arg2
, zero_vec
);
18623 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
18624 g
= gimple_build_assign (gimple_call_lhs (stmt
),
18625 VEC_COND_EXPR
, cmp
,
18627 gimple_set_location (g
, loc
);
18628 gsi_replace (gsi
, g
, false);
18631 gsi_replace (gsi
, gimple_build_nop (), false);
18635 case IX86_BUILTIN_PCMPEQB128
:
18636 case IX86_BUILTIN_PCMPEQW128
:
18637 case IX86_BUILTIN_PCMPEQD128
:
18638 case IX86_BUILTIN_PCMPEQQ
:
18639 case IX86_BUILTIN_PCMPEQB256
:
18640 case IX86_BUILTIN_PCMPEQW256
:
18641 case IX86_BUILTIN_PCMPEQD256
:
18642 case IX86_BUILTIN_PCMPEQQ256
:
18646 case IX86_BUILTIN_PCMPGTB128
:
18647 case IX86_BUILTIN_PCMPGTW128
:
18648 case IX86_BUILTIN_PCMPGTD128
:
18649 case IX86_BUILTIN_PCMPGTQ
:
18650 case IX86_BUILTIN_PCMPGTB256
:
18651 case IX86_BUILTIN_PCMPGTW256
:
18652 case IX86_BUILTIN_PCMPGTD256
:
18653 case IX86_BUILTIN_PCMPGTQ256
:
18657 gcc_assert (n_args
== 2);
18658 arg0
= gimple_call_arg (stmt
, 0);
18659 arg1
= gimple_call_arg (stmt
, 1);
18660 if (gimple_call_lhs (stmt
))
18662 loc
= gimple_location (stmt
);
18663 tree type
= TREE_TYPE (arg0
);
18664 tree zero_vec
= build_zero_cst (type
);
18665 tree minus_one_vec
= build_minus_one_cst (type
);
18666 tree cmp_type
= truth_type_for (type
);
18667 tree cmp
= gimple_build (&stmts
, tcode
, cmp_type
, arg0
, arg1
);
18668 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
18669 g
= gimple_build_assign (gimple_call_lhs (stmt
),
18670 VEC_COND_EXPR
, cmp
,
18671 minus_one_vec
, zero_vec
);
18672 gimple_set_location (g
, loc
);
18673 gsi_replace (gsi
, g
, false);
18676 gsi_replace (gsi
, gimple_build_nop (), false);
18679 case IX86_BUILTIN_PSLLD
:
18680 case IX86_BUILTIN_PSLLD128
:
18681 case IX86_BUILTIN_PSLLD128_MASK
:
18682 case IX86_BUILTIN_PSLLD256
:
18683 case IX86_BUILTIN_PSLLD256_MASK
:
18684 case IX86_BUILTIN_PSLLD512
:
18685 case IX86_BUILTIN_PSLLDI
:
18686 case IX86_BUILTIN_PSLLDI128
:
18687 case IX86_BUILTIN_PSLLDI128_MASK
:
18688 case IX86_BUILTIN_PSLLDI256
:
18689 case IX86_BUILTIN_PSLLDI256_MASK
:
18690 case IX86_BUILTIN_PSLLDI512
:
18691 case IX86_BUILTIN_PSLLQ
:
18692 case IX86_BUILTIN_PSLLQ128
:
18693 case IX86_BUILTIN_PSLLQ128_MASK
:
18694 case IX86_BUILTIN_PSLLQ256
:
18695 case IX86_BUILTIN_PSLLQ256_MASK
:
18696 case IX86_BUILTIN_PSLLQ512
:
18697 case IX86_BUILTIN_PSLLQI
:
18698 case IX86_BUILTIN_PSLLQI128
:
18699 case IX86_BUILTIN_PSLLQI128_MASK
:
18700 case IX86_BUILTIN_PSLLQI256
:
18701 case IX86_BUILTIN_PSLLQI256_MASK
:
18702 case IX86_BUILTIN_PSLLQI512
:
18703 case IX86_BUILTIN_PSLLW
:
18704 case IX86_BUILTIN_PSLLW128
:
18705 case IX86_BUILTIN_PSLLW128_MASK
:
18706 case IX86_BUILTIN_PSLLW256
:
18707 case IX86_BUILTIN_PSLLW256_MASK
:
18708 case IX86_BUILTIN_PSLLW512_MASK
:
18709 case IX86_BUILTIN_PSLLWI
:
18710 case IX86_BUILTIN_PSLLWI128
:
18711 case IX86_BUILTIN_PSLLWI128_MASK
:
18712 case IX86_BUILTIN_PSLLWI256
:
18713 case IX86_BUILTIN_PSLLWI256_MASK
:
18714 case IX86_BUILTIN_PSLLWI512_MASK
:
18718 case IX86_BUILTIN_PSRAD
:
18719 case IX86_BUILTIN_PSRAD128
:
18720 case IX86_BUILTIN_PSRAD128_MASK
:
18721 case IX86_BUILTIN_PSRAD256
:
18722 case IX86_BUILTIN_PSRAD256_MASK
:
18723 case IX86_BUILTIN_PSRAD512
:
18724 case IX86_BUILTIN_PSRADI
:
18725 case IX86_BUILTIN_PSRADI128
:
18726 case IX86_BUILTIN_PSRADI128_MASK
:
18727 case IX86_BUILTIN_PSRADI256
:
18728 case IX86_BUILTIN_PSRADI256_MASK
:
18729 case IX86_BUILTIN_PSRADI512
:
18730 case IX86_BUILTIN_PSRAQ128_MASK
:
18731 case IX86_BUILTIN_PSRAQ256_MASK
:
18732 case IX86_BUILTIN_PSRAQ512
:
18733 case IX86_BUILTIN_PSRAQI128_MASK
:
18734 case IX86_BUILTIN_PSRAQI256_MASK
:
18735 case IX86_BUILTIN_PSRAQI512
:
18736 case IX86_BUILTIN_PSRAW
:
18737 case IX86_BUILTIN_PSRAW128
:
18738 case IX86_BUILTIN_PSRAW128_MASK
:
18739 case IX86_BUILTIN_PSRAW256
:
18740 case IX86_BUILTIN_PSRAW256_MASK
:
18741 case IX86_BUILTIN_PSRAW512
:
18742 case IX86_BUILTIN_PSRAWI
:
18743 case IX86_BUILTIN_PSRAWI128
:
18744 case IX86_BUILTIN_PSRAWI128_MASK
:
18745 case IX86_BUILTIN_PSRAWI256
:
18746 case IX86_BUILTIN_PSRAWI256_MASK
:
18747 case IX86_BUILTIN_PSRAWI512
:
18751 case IX86_BUILTIN_PSRLD
:
18752 case IX86_BUILTIN_PSRLD128
:
18753 case IX86_BUILTIN_PSRLD128_MASK
:
18754 case IX86_BUILTIN_PSRLD256
:
18755 case IX86_BUILTIN_PSRLD256_MASK
:
18756 case IX86_BUILTIN_PSRLD512
:
18757 case IX86_BUILTIN_PSRLDI
:
18758 case IX86_BUILTIN_PSRLDI128
:
18759 case IX86_BUILTIN_PSRLDI128_MASK
:
18760 case IX86_BUILTIN_PSRLDI256
:
18761 case IX86_BUILTIN_PSRLDI256_MASK
:
18762 case IX86_BUILTIN_PSRLDI512
:
18763 case IX86_BUILTIN_PSRLQ
:
18764 case IX86_BUILTIN_PSRLQ128
:
18765 case IX86_BUILTIN_PSRLQ128_MASK
:
18766 case IX86_BUILTIN_PSRLQ256
:
18767 case IX86_BUILTIN_PSRLQ256_MASK
:
18768 case IX86_BUILTIN_PSRLQ512
:
18769 case IX86_BUILTIN_PSRLQI
:
18770 case IX86_BUILTIN_PSRLQI128
:
18771 case IX86_BUILTIN_PSRLQI128_MASK
:
18772 case IX86_BUILTIN_PSRLQI256
:
18773 case IX86_BUILTIN_PSRLQI256_MASK
:
18774 case IX86_BUILTIN_PSRLQI512
:
18775 case IX86_BUILTIN_PSRLW
:
18776 case IX86_BUILTIN_PSRLW128
:
18777 case IX86_BUILTIN_PSRLW128_MASK
:
18778 case IX86_BUILTIN_PSRLW256
:
18779 case IX86_BUILTIN_PSRLW256_MASK
:
18780 case IX86_BUILTIN_PSRLW512
:
18781 case IX86_BUILTIN_PSRLWI
:
18782 case IX86_BUILTIN_PSRLWI128
:
18783 case IX86_BUILTIN_PSRLWI128_MASK
:
18784 case IX86_BUILTIN_PSRLWI256
:
18785 case IX86_BUILTIN_PSRLWI256_MASK
:
18786 case IX86_BUILTIN_PSRLWI512
:
18790 case IX86_BUILTIN_PSLLVV16HI
:
18791 case IX86_BUILTIN_PSLLVV16SI
:
18792 case IX86_BUILTIN_PSLLVV2DI
:
18793 case IX86_BUILTIN_PSLLVV2DI_MASK
:
18794 case IX86_BUILTIN_PSLLVV32HI
:
18795 case IX86_BUILTIN_PSLLVV4DI
:
18796 case IX86_BUILTIN_PSLLVV4DI_MASK
:
18797 case IX86_BUILTIN_PSLLVV4SI
:
18798 case IX86_BUILTIN_PSLLVV4SI_MASK
:
18799 case IX86_BUILTIN_PSLLVV8DI
:
18800 case IX86_BUILTIN_PSLLVV8HI
:
18801 case IX86_BUILTIN_PSLLVV8SI
:
18802 case IX86_BUILTIN_PSLLVV8SI_MASK
:
18806 case IX86_BUILTIN_PSRAVQ128
:
18807 case IX86_BUILTIN_PSRAVQ256
:
18808 case IX86_BUILTIN_PSRAVV16HI
:
18809 case IX86_BUILTIN_PSRAVV16SI
:
18810 case IX86_BUILTIN_PSRAVV32HI
:
18811 case IX86_BUILTIN_PSRAVV4SI
:
18812 case IX86_BUILTIN_PSRAVV4SI_MASK
:
18813 case IX86_BUILTIN_PSRAVV8DI
:
18814 case IX86_BUILTIN_PSRAVV8HI
:
18815 case IX86_BUILTIN_PSRAVV8SI
:
18816 case IX86_BUILTIN_PSRAVV8SI_MASK
:
18820 case IX86_BUILTIN_PSRLVV16HI
:
18821 case IX86_BUILTIN_PSRLVV16SI
:
18822 case IX86_BUILTIN_PSRLVV2DI
:
18823 case IX86_BUILTIN_PSRLVV2DI_MASK
:
18824 case IX86_BUILTIN_PSRLVV32HI
:
18825 case IX86_BUILTIN_PSRLVV4DI
:
18826 case IX86_BUILTIN_PSRLVV4DI_MASK
:
18827 case IX86_BUILTIN_PSRLVV4SI
:
18828 case IX86_BUILTIN_PSRLVV4SI_MASK
:
18829 case IX86_BUILTIN_PSRLVV8DI
:
18830 case IX86_BUILTIN_PSRLVV8HI
:
18831 case IX86_BUILTIN_PSRLVV8SI
:
18832 case IX86_BUILTIN_PSRLVV8SI_MASK
:
18838 gcc_assert (n_args
>= 2);
18839 if (!gimple_call_lhs (stmt
))
18841 arg0
= gimple_call_arg (stmt
, 0);
18842 arg1
= gimple_call_arg (stmt
, 1);
18843 elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
18844 /* For masked shift, only optimize if the mask is all ones. */
18846 && !ix86_masked_all_ones (elems
, gimple_call_arg (stmt
, n_args
- 1)))
18850 if (TREE_CODE (arg1
) != VECTOR_CST
)
18852 count
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
)));
18853 if (integer_zerop (arg1
))
18855 else if (rcode
== ASHIFTRT
)
18858 for (unsigned int i
= 0; i
< VECTOR_CST_NELTS (arg1
); ++i
)
18860 tree elt
= VECTOR_CST_ELT (arg1
, i
);
18861 if (!wi::neg_p (wi::to_wide (elt
))
18862 && wi::to_widest (elt
) < count
)
18868 arg1
= ix86_vector_shift_count (arg1
);
18871 count
= tree_to_uhwi (arg1
);
18875 /* Just return the first argument for shift by 0. */
18876 loc
= gimple_location (stmt
);
18877 g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
18878 gimple_set_location (g
, loc
);
18879 gsi_replace (gsi
, g
, false);
18882 if (rcode
!= ASHIFTRT
18883 && count
>= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
))))
18885 /* For shift counts equal or greater than precision, except for
18886 arithmetic right shift the result is zero. */
18887 loc
= gimple_location (stmt
);
18888 g
= gimple_build_assign (gimple_call_lhs (stmt
),
18889 build_zero_cst (TREE_TYPE (arg0
)));
18890 gimple_set_location (g
, loc
);
18891 gsi_replace (gsi
, g
, false);
18896 case IX86_BUILTIN_SHUFPD512
:
18897 case IX86_BUILTIN_SHUFPS512
:
18898 case IX86_BUILTIN_SHUFPD
:
18899 case IX86_BUILTIN_SHUFPD256
:
18900 case IX86_BUILTIN_SHUFPS
:
18901 case IX86_BUILTIN_SHUFPS256
:
18902 arg0
= gimple_call_arg (stmt
, 0);
18903 elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
18904 /* This is masked shuffle. Only optimize if the mask is all ones. */
18906 && !ix86_masked_all_ones (elems
,
18907 gimple_call_arg (stmt
, n_args
- 1)))
18909 arg2
= gimple_call_arg (stmt
, 2);
18910 if (TREE_CODE (arg2
) == INTEGER_CST
&& gimple_call_lhs (stmt
))
18912 unsigned HOST_WIDE_INT shuffle_mask
= TREE_INT_CST_LOW (arg2
);
18913 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
18914 if (shuffle_mask
> 255)
18917 machine_mode imode
= GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0
)));
18918 loc
= gimple_location (stmt
);
18919 tree itype
= (imode
== E_DFmode
18920 ? long_long_integer_type_node
: integer_type_node
);
18921 tree vtype
= build_vector_type (itype
, elems
);
18922 tree_vector_builder
elts (vtype
, elems
, 1);
18925 /* Transform integer shuffle_mask to vector perm_mask which
18926 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
18927 for (unsigned i
= 0; i
!= elems
; i
++)
18930 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
18931 provide 2 select constrols for each element of the
18933 if (imode
== E_DFmode
)
18934 sel_idx
= (i
& 1) * elems
+ (i
& ~1)
18935 + ((shuffle_mask
>> i
) & 1);
18938 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
18939 controls for each element of the destination. */
18940 unsigned j
= i
% 4;
18941 sel_idx
= ((i
>> 1) & 1) * elems
+ (i
& ~3)
18942 + ((shuffle_mask
>> 2 * j
) & 3);
18944 elts
.quick_push (build_int_cst (itype
, sel_idx
));
18947 tree perm_mask
= elts
.build ();
18948 arg1
= gimple_call_arg (stmt
, 1);
18949 g
= gimple_build_assign (gimple_call_lhs (stmt
),
18951 arg0
, arg1
, perm_mask
);
18952 gimple_set_location (g
, loc
);
18953 gsi_replace (gsi
, g
, false);
18956 // Do not error yet, the constant could be propagated later?
18959 case IX86_BUILTIN_PABSB
:
18960 case IX86_BUILTIN_PABSW
:
18961 case IX86_BUILTIN_PABSD
:
18962 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
18963 if (!TARGET_MMX_WITH_SSE
)
18966 case IX86_BUILTIN_PABSB128
:
18967 case IX86_BUILTIN_PABSB256
:
18968 case IX86_BUILTIN_PABSB512
:
18969 case IX86_BUILTIN_PABSW128
:
18970 case IX86_BUILTIN_PABSW256
:
18971 case IX86_BUILTIN_PABSW512
:
18972 case IX86_BUILTIN_PABSD128
:
18973 case IX86_BUILTIN_PABSD256
:
18974 case IX86_BUILTIN_PABSD512
:
18975 case IX86_BUILTIN_PABSQ128
:
18976 case IX86_BUILTIN_PABSQ256
:
18977 case IX86_BUILTIN_PABSQ512
:
18978 case IX86_BUILTIN_PABSB128_MASK
:
18979 case IX86_BUILTIN_PABSB256_MASK
:
18980 case IX86_BUILTIN_PABSW128_MASK
:
18981 case IX86_BUILTIN_PABSW256_MASK
:
18982 case IX86_BUILTIN_PABSD128_MASK
:
18983 case IX86_BUILTIN_PABSD256_MASK
:
18984 gcc_assert (n_args
>= 1);
18985 if (!gimple_call_lhs (stmt
))
18987 arg0
= gimple_call_arg (stmt
, 0);
18988 elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
18989 /* For masked ABS, only optimize if the mask is all ones. */
18991 && !ix86_masked_all_ones (elems
, gimple_call_arg (stmt
, n_args
- 1)))
18994 tree utype
, ures
, vce
;
18995 utype
= unsigned_type_for (TREE_TYPE (arg0
));
18996 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
18997 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
18998 ures
= gimple_build (&stmts
, ABSU_EXPR
, utype
, arg0
);
18999 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
19000 loc
= gimple_location (stmt
);
19001 vce
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (arg0
), ures
);
19002 g
= gimple_build_assign (gimple_call_lhs (stmt
),
19003 VIEW_CONVERT_EXPR
, vce
);
19004 gsi_replace (gsi
, g
, false);
19015 /* Handler for an SVML-style interface to
19016 a library with vectorized intrinsics. */
19019 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
19022 tree fntype
, new_fndecl
, args
;
19025 machine_mode el_mode
, in_mode
;
19028 /* The SVML is suitable for unsafe math only. */
19029 if (!flag_unsafe_math_optimizations
)
19032 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19033 n
= TYPE_VECTOR_SUBPARTS (type_out
);
19034 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19035 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19036 if (el_mode
!= in_mode
19060 if ((el_mode
!= DFmode
|| n
!= 2)
19061 && (el_mode
!= SFmode
|| n
!= 4))
19069 tree fndecl
= mathfn_built_in (el_mode
== DFmode
19070 ? double_type_node
: float_type_node
, fn
);
19071 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
19073 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
19074 strcpy (name
, "vmlsLn4");
19075 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
19076 strcpy (name
, "vmldLn2");
19079 sprintf (name
, "vmls%s", bname
+10);
19080 name
[strlen (name
)-1] = '4';
19083 sprintf (name
, "vmld%s2", bname
+10);
19085 /* Convert to uppercase. */
19089 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
19093 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
19095 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
19097 /* Build a function declaration for the vectorized function. */
19098 new_fndecl
= build_decl (BUILTINS_LOCATION
,
19099 FUNCTION_DECL
, get_identifier (name
), fntype
);
19100 TREE_PUBLIC (new_fndecl
) = 1;
19101 DECL_EXTERNAL (new_fndecl
) = 1;
19102 DECL_IS_NOVOPS (new_fndecl
) = 1;
19103 TREE_READONLY (new_fndecl
) = 1;
19108 /* Handler for an ACML-style interface to
19109 a library with vectorized intrinsics. */
19112 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
19114 char name
[20] = "__vr.._";
19115 tree fntype
, new_fndecl
, args
;
19118 machine_mode el_mode
, in_mode
;
19121 /* The ACML is 64bits only and suitable for unsafe math only as
19122 it does not correctly support parts of IEEE with the required
19123 precision such as denormals. */
19125 || !flag_unsafe_math_optimizations
)
19128 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19129 n
= TYPE_VECTOR_SUBPARTS (type_out
);
19130 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19131 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19132 if (el_mode
!= in_mode
19144 if (el_mode
== DFmode
&& n
== 2)
19149 else if (el_mode
== SFmode
&& n
== 4)
19162 tree fndecl
= mathfn_built_in (el_mode
== DFmode
19163 ? double_type_node
: float_type_node
, fn
);
19164 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
19165 sprintf (name
+ 7, "%s", bname
+10);
19168 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
19172 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
19174 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
19176 /* Build a function declaration for the vectorized function. */
19177 new_fndecl
= build_decl (BUILTINS_LOCATION
,
19178 FUNCTION_DECL
, get_identifier (name
), fntype
);
19179 TREE_PUBLIC (new_fndecl
) = 1;
19180 DECL_EXTERNAL (new_fndecl
) = 1;
19181 DECL_IS_NOVOPS (new_fndecl
) = 1;
19182 TREE_READONLY (new_fndecl
) = 1;
19187 /* Returns a decl of a function that implements scatter store with
19188 register type VECTYPE and index type INDEX_TYPE and SCALE.
19189 Return NULL_TREE if it is not available. */
19192 ix86_vectorize_builtin_scatter (const_tree vectype
,
19193 const_tree index_type
, int scale
)
19196 enum ix86_builtins code
;
19198 if (!TARGET_AVX512F
)
19201 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 2u)
19202 ? !TARGET_USE_SCATTER_2PARTS
19203 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 4u)
19204 ? !TARGET_USE_SCATTER_4PARTS
19205 : !TARGET_USE_SCATTER_8PARTS
))
19208 if ((TREE_CODE (index_type
) != INTEGER_TYPE
19209 && !POINTER_TYPE_P (index_type
))
19210 || (TYPE_MODE (index_type
) != SImode
19211 && TYPE_MODE (index_type
) != DImode
))
19214 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
19217 /* v*scatter* insn sign extends index to pointer mode. */
19218 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
19219 && TYPE_UNSIGNED (index_type
))
19222 /* Scale can be 1, 2, 4 or 8. */
19225 || (scale
& (scale
- 1)) != 0)
19228 si
= TYPE_MODE (index_type
) == SImode
;
19229 switch (TYPE_MODE (vectype
))
19232 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
19235 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
19238 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
19241 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
19244 if (TARGET_AVX512VL
)
19245 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DF
: IX86_BUILTIN_SCATTERDIV4DF
;
19250 if (TARGET_AVX512VL
)
19251 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DI
: IX86_BUILTIN_SCATTERDIV4DI
;
19256 if (TARGET_AVX512VL
)
19257 code
= si
? IX86_BUILTIN_SCATTERSIV8SF
: IX86_BUILTIN_SCATTERALTDIV8SF
;
19262 if (TARGET_AVX512VL
)
19263 code
= si
? IX86_BUILTIN_SCATTERSIV8SI
: IX86_BUILTIN_SCATTERALTDIV8SI
;
19268 if (TARGET_AVX512VL
)
19269 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DF
: IX86_BUILTIN_SCATTERDIV2DF
;
19274 if (TARGET_AVX512VL
)
19275 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DI
: IX86_BUILTIN_SCATTERDIV2DI
;
19280 if (TARGET_AVX512VL
)
19281 code
= si
? IX86_BUILTIN_SCATTERSIV4SF
: IX86_BUILTIN_SCATTERALTDIV4SF
;
19286 if (TARGET_AVX512VL
)
19287 code
= si
? IX86_BUILTIN_SCATTERSIV4SI
: IX86_BUILTIN_SCATTERALTDIV4SI
;
19295 return get_ix86_builtin (code
);
19298 /* Return true if it is safe to use the rsqrt optabs to optimize
19302 use_rsqrt_p (machine_mode mode
)
19304 return ((mode
== HFmode
19305 || (TARGET_SSE
&& TARGET_SSE_MATH
))
19306 && flag_finite_math_only
19307 && !flag_trapping_math
19308 && flag_unsafe_math_optimizations
);
19311 /* Helper for avx_vpermilps256_operand et al. This is also used by
19312 the expansion functions to turn the parallel back into a mask.
19313 The return value is 0 for no match and the imm8+1 for a match. */
19316 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
19318 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
19320 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
19322 if (XVECLEN (par
, 0) != (int) nelt
)
19325 /* Validate that all of the elements are constants, and not totally
19326 out of range. Copy the data into an integral array to make the
19327 subsequent checks easier. */
19328 for (i
= 0; i
< nelt
; ++i
)
19330 rtx er
= XVECEXP (par
, 0, i
);
19331 unsigned HOST_WIDE_INT ei
;
19333 if (!CONST_INT_P (er
))
19344 /* In the 512-bit DFmode case, we can only move elements within
19345 a 128-bit lane. First fill the second part of the mask,
19347 for (i
= 4; i
< 6; ++i
)
19349 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
19351 mask
|= (ipar
[i
] - 4) << i
;
19353 for (i
= 6; i
< 8; ++i
)
19357 mask
|= (ipar
[i
] - 6) << i
;
19362 /* In the 256-bit DFmode case, we can only move elements within
19364 for (i
= 0; i
< 2; ++i
)
19368 mask
|= ipar
[i
] << i
;
19370 for (i
= 2; i
< 4; ++i
)
19374 mask
|= (ipar
[i
] - 2) << i
;
19379 /* In 512 bit SFmode case, permutation in the upper 256 bits
19380 must mirror the permutation in the lower 256-bits. */
19381 for (i
= 0; i
< 8; ++i
)
19382 if (ipar
[i
] + 8 != ipar
[i
+ 8])
19387 /* In 256 bit SFmode case, we have full freedom of
19388 movement within the low 128-bit lane, but the high 128-bit
19389 lane must mirror the exact same pattern. */
19390 for (i
= 0; i
< 4; ++i
)
19391 if (ipar
[i
] + 4 != ipar
[i
+ 4])
19398 /* In the 128-bit case, we've full freedom in the placement of
19399 the elements from the source operand. */
19400 for (i
= 0; i
< nelt
; ++i
)
19401 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
19405 gcc_unreachable ();
19408 /* Make sure success has a non-zero value by adding one. */
19412 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19413 the expansion functions to turn the parallel back into a mask.
19414 The return value is 0 for no match and the imm8+1 for a match. */
19417 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
19419 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
19421 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
19423 if (XVECLEN (par
, 0) != (int) nelt
)
19426 /* Validate that all of the elements are constants, and not totally
19427 out of range. Copy the data into an integral array to make the
19428 subsequent checks easier. */
19429 for (i
= 0; i
< nelt
; ++i
)
19431 rtx er
= XVECEXP (par
, 0, i
);
19432 unsigned HOST_WIDE_INT ei
;
19434 if (!CONST_INT_P (er
))
19437 if (ei
>= 2 * nelt
)
19442 /* Validate that the halves of the permute are halves. */
19443 for (i
= 0; i
< nelt2
- 1; ++i
)
19444 if (ipar
[i
] + 1 != ipar
[i
+ 1])
19446 for (i
= nelt2
; i
< nelt
- 1; ++i
)
19447 if (ipar
[i
] + 1 != ipar
[i
+ 1])
19450 /* Reconstruct the mask. */
19451 for (i
= 0; i
< 2; ++i
)
19453 unsigned e
= ipar
[i
* nelt2
];
19457 mask
|= e
<< (i
* 4);
19460 /* Make sure success has a non-zero value by adding one. */
19464 /* Return a mask of VPTERNLOG operands that do not affect output. */
19467 vpternlog_redundant_operand_mask (rtx pternlog_imm
)
19470 int imm8
= INTVAL (pternlog_imm
);
19472 if (((imm8
>> 4) & 0x0F) == (imm8
& 0x0F))
19474 if (((imm8
>> 2) & 0x33) == (imm8
& 0x33))
19476 if (((imm8
>> 1) & 0x55) == (imm8
& 0x55))
19482 /* Eliminate false dependencies on operands that do not affect output
19483 by substituting other operands of a VPTERNLOG. */
19486 substitute_vpternlog_operands (rtx
*operands
)
19488 int mask
= vpternlog_redundant_operand_mask (operands
[4]);
19490 if (mask
& 1) /* The first operand is redundant. */
19491 operands
[1] = operands
[2];
19493 if (mask
& 2) /* The second operand is redundant. */
19494 operands
[2] = operands
[1];
19496 if (mask
& 4) /* The third operand is redundant. */
19497 operands
[3] = operands
[1];
19498 else if (REG_P (operands
[3]))
19501 operands
[1] = operands
[3];
19503 operands
[2] = operands
[3];
19507 /* Return a register priority for hard reg REGNO. */
19509 ix86_register_priority (int hard_regno
)
19511 /* ebp and r13 as the base always wants a displacement, r12 as the
19512 base always wants an index. So discourage their usage in an
19514 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
19516 if (hard_regno
== BP_REG
)
19518 /* New x86-64 int registers result in bigger code size. Discourage them. */
19519 if (REX_INT_REGNO_P (hard_regno
))
19521 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
19522 if (REX_SSE_REGNO_P (hard_regno
))
19524 if (EXT_REX_SSE_REGNO_P (hard_regno
))
19526 /* Usage of AX register results in smaller code. Prefer it. */
19527 if (hard_regno
== AX_REG
)
19532 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
19534 Put float CONST_DOUBLE in the constant pool instead of fp regs.
19535 QImode must go into class Q_REGS.
19536 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19537 movdf to do mem-to-mem moves through integer regs. */
19540 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
19542 machine_mode mode
= GET_MODE (x
);
19544 /* We're only allowed to return a subclass of CLASS. Many of the
19545 following checks fail for NO_REGS, so eliminate that early. */
19546 if (regclass
== NO_REGS
)
19549 /* All classes can load zeros. */
19550 if (x
== CONST0_RTX (mode
))
19553 /* Force constants into memory if we are loading a (nonzero) constant into
19554 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
19555 instructions to load from a constant. */
19557 && (MAYBE_MMX_CLASS_P (regclass
)
19558 || MAYBE_SSE_CLASS_P (regclass
)
19559 || MAYBE_MASK_CLASS_P (regclass
)))
19562 /* Floating-point constants need more complex checks. */
19563 if (CONST_DOUBLE_P (x
))
19565 /* General regs can load everything. */
19566 if (INTEGER_CLASS_P (regclass
))
19569 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19570 zero above. We only want to wind up preferring 80387 registers if
19571 we plan on doing computation with them. */
19572 if (IS_STACK_MODE (mode
)
19573 && standard_80387_constant_p (x
) > 0)
19575 /* Limit class to FP regs. */
19576 if (FLOAT_CLASS_P (regclass
))
19583 /* Prefer SSE if we can use them for math. Also allow integer regs
19584 when moves between register units are cheap. */
19585 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19587 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
19588 && TARGET_INTER_UNIT_MOVES_TO_VEC
19589 && GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (word_mode
))
19590 return INT_SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
19592 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
19595 /* Generally when we see PLUS here, it's the function invariant
19596 (plus soft-fp const_int). Which can only be computed into general
19598 if (GET_CODE (x
) == PLUS
)
19599 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
19601 /* QImode constants are easy to load, but non-constant QImode data
19602 must go into Q_REGS or ALL_MASK_REGS. */
19603 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
19605 if (Q_CLASS_P (regclass
))
19607 else if (reg_class_subset_p (Q_REGS
, regclass
))
19609 else if (MASK_CLASS_P (regclass
))
19618 /* Discourage putting floating-point values in SSE registers unless
19619 SSE math is being used, and likewise for the 387 registers. */
19621 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
19623 /* Restrict the output reload class to the register bank that we are doing
19624 math on. If we would like not to return a subset of CLASS, reject this
19625 alternative: if reload cannot do this, it will still use its choice. */
19626 machine_mode mode
= GET_MODE (x
);
19627 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19628 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
19630 if (IS_STACK_MODE (mode
))
19631 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
19637 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
19638 machine_mode mode
, secondary_reload_info
*sri
)
19640 /* Double-word spills from general registers to non-offsettable memory
19641 references (zero-extended addresses) require special handling. */
19644 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
19645 && INTEGER_CLASS_P (rclass
)
19646 && !offsettable_memref_p (x
))
19649 ? CODE_FOR_reload_noff_load
19650 : CODE_FOR_reload_noff_store
);
19651 /* Add the cost of moving address to a temporary. */
19652 sri
->extra_cost
= 1;
19657 /* QImode spills from non-QI registers require
19658 intermediate register on 32bit targets. */
19660 && ((!TARGET_64BIT
&& !in_p
19661 && INTEGER_CLASS_P (rclass
)
19662 && MAYBE_NON_Q_CLASS_P (rclass
))
19663 || (!TARGET_AVX512DQ
19664 && MAYBE_MASK_CLASS_P (rclass
))))
19666 int regno
= true_regnum (x
);
19668 /* Return Q_REGS if the operand is in memory. */
19675 /* Require movement to gpr, and then store to memory. */
19676 if ((mode
== HFmode
|| mode
== HImode
|| mode
== V2QImode
19679 && SSE_CLASS_P (rclass
)
19680 && !in_p
&& MEM_P (x
))
19682 sri
->extra_cost
= 1;
19683 return GENERAL_REGS
;
19686 /* This condition handles corner case where an expression involving
19687 pointers gets vectorized. We're trying to use the address of a
19688 stack slot as a vector initializer.
19690 (set (reg:V2DI 74 [ vect_cst_.2 ])
19691 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
19693 Eventually frame gets turned into sp+offset like this:
19695 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19696 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19697 (const_int 392 [0x188]))))
19699 That later gets turned into:
19701 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19702 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19703 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
19705 We'll have the following reload recorded:
19707 Reload 0: reload_in (DI) =
19708 (plus:DI (reg/f:DI 7 sp)
19709 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
19710 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19711 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
19712 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
19713 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19714 reload_reg_rtx: (reg:V2DI 22 xmm1)
19716 Which isn't going to work since SSE instructions can't handle scalar
19717 additions. Returning GENERAL_REGS forces the addition into integer
19718 register and reload can handle subsequent reloads without problems. */
19720 if (in_p
&& GET_CODE (x
) == PLUS
19721 && SSE_CLASS_P (rclass
)
19722 && SCALAR_INT_MODE_P (mode
))
19723 return GENERAL_REGS
;
19728 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
19731 ix86_class_likely_spilled_p (reg_class_t rclass
)
19742 case SSE_FIRST_REG
:
19744 case FP_SECOND_REG
:
19754 /* Return true if a set of DST by the expression SRC should be allowed.
19755 This prevents complex sets of likely_spilled hard regs before reload. */
19758 ix86_hardreg_mov_ok (rtx dst
, rtx src
)
19760 /* Avoid complex sets of likely_spilled hard registers before reload. */
19761 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
19762 && !REG_P (src
) && !MEM_P (src
)
19763 && !(VECTOR_MODE_P (GET_MODE (dst
))
19764 ? standard_sse_constant_p (src
, GET_MODE (dst
))
19765 : x86_64_immediate_operand (src
, GET_MODE (dst
)))
19766 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
)))
19767 && !reload_completed
)
19772 /* If we are copying between registers from different register sets
19773 (e.g. FP and integer), we may need a memory location.
19775 The function can't work reliably when one of the CLASSES is a class
19776 containing registers from multiple sets. We avoid this by never combining
19777 different sets in a single alternative in the machine description.
19778 Ensure that this constraint holds to avoid unexpected surprises.
19780 When STRICT is false, we are being called from REGISTER_MOVE_COST,
19781 so do not enforce these sanity checks.
19783 To optimize register_move_cost performance, define inline variant. */
19786 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
19787 reg_class_t class2
, int strict
)
19789 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
19792 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
19793 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
19794 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
19795 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
19796 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
19797 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
19798 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
19799 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
19801 gcc_assert (!strict
|| lra_in_progress
);
19805 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
19808 /* ??? This is a lie. We do have moves between mmx/general, and for
19809 mmx/sse2. But by saying we need secondary memory we discourage the
19810 register allocator from using the mmx registers unless needed. */
19811 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
19814 /* Between mask and general, we have moves no larger than word size. */
19815 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
19817 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
))
19818 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19822 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
19824 /* SSE1 doesn't have any direct moves from other classes. */
19828 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
)))
19831 int msize
= GET_MODE_SIZE (mode
);
19833 /* Between SSE and general, we have moves no larger than word size. */
19834 if (msize
> UNITS_PER_WORD
)
19837 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
19838 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
19839 int minsize
= GET_MODE_SIZE (TARGET_SSE2
? HImode
: SImode
);
19841 if (msize
< minsize
)
19844 /* If the target says that inter-unit moves are more expensive
19845 than moving through memory, then don't generate them. */
19846 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
19847 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
19854 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
19857 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
19858 reg_class_t class2
)
19860 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
19863 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
19865 get_secondary_mem widens integral modes to BITS_PER_WORD.
19866 There is no need to emit full 64 bit move on 64 bit targets
19867 for integral modes that can be moved using 32 bit move. */
19869 static machine_mode
19870 ix86_secondary_memory_needed_mode (machine_mode mode
)
19872 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
19873 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
19877 /* Implement the TARGET_CLASS_MAX_NREGS hook.
19879 On the 80386, this is the size of MODE in words,
19880 except in the FP regs, where a single reg is always enough. */
19882 static unsigned char
19883 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
19885 if (MAYBE_INTEGER_CLASS_P (rclass
))
19887 if (mode
== XFmode
)
19888 return (TARGET_64BIT
? 2 : 3);
19889 else if (mode
== XCmode
)
19890 return (TARGET_64BIT
? 4 : 6);
19892 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19896 if (COMPLEX_MODE_P (mode
))
19903 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19906 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
19907 reg_class_t regclass
)
19912 /* x87 registers can't do subreg at all, as all values are reformatted
19913 to extended precision. */
19914 if (MAYBE_FLOAT_CLASS_P (regclass
))
19917 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
19919 /* Vector registers do not support QI or HImode loads. If we don't
19920 disallow a change to these modes, reload will assume it's ok to
19921 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19922 the vec_dupv4hi pattern.
19923 NB: SSE2 can load 16bit data to sse register via pinsrw. */
19924 int mov_size
= MAYBE_SSE_CLASS_P (regclass
) && TARGET_SSE2
? 2 : 4;
19925 if (GET_MODE_SIZE (from
) < mov_size
19926 || GET_MODE_SIZE (to
) < mov_size
)
19933 /* Return index of MODE in the sse load/store tables. */
19936 sse_store_index (machine_mode mode
)
19938 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
19939 costs to processor_costs, which requires changes to all entries in
19940 processor cost table. */
19941 if (mode
== E_HFmode
)
19944 switch (GET_MODE_SIZE (mode
))
19961 /* Return the cost of moving data of mode M between a
19962 register and memory. A value of 2 is the default; this cost is
19963 relative to those in `REGISTER_MOVE_COST'.
19965 This function is used extensively by register_move_cost that is used to
19966 build tables at startup. Make it inline in this case.
19967 When IN is 2, return maximum of in and out move cost.
19969 If moving between registers and memory is more expensive than
19970 between two registers, you should define this macro to express the
19973 Model also increased moving costs of QImode registers in non
19977 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
, int in
)
19981 if (FLOAT_CLASS_P (regclass
))
19999 return MAX (ix86_cost
->hard_register
.fp_load
[index
],
20000 ix86_cost
->hard_register
.fp_store
[index
]);
20001 return in
? ix86_cost
->hard_register
.fp_load
[index
]
20002 : ix86_cost
->hard_register
.fp_store
[index
];
20004 if (SSE_CLASS_P (regclass
))
20006 int index
= sse_store_index (mode
);
20010 return MAX (ix86_cost
->hard_register
.sse_load
[index
],
20011 ix86_cost
->hard_register
.sse_store
[index
]);
20012 return in
? ix86_cost
->hard_register
.sse_load
[index
]
20013 : ix86_cost
->hard_register
.sse_store
[index
];
20015 if (MASK_CLASS_P (regclass
))
20018 switch (GET_MODE_SIZE (mode
))
20026 /* DImode loads and stores assumed to cost the same as SImode. */
20036 return MAX (ix86_cost
->hard_register
.mask_load
[index
],
20037 ix86_cost
->hard_register
.mask_store
[index
]);
20038 return in
? ix86_cost
->hard_register
.mask_load
[2]
20039 : ix86_cost
->hard_register
.mask_store
[2];
20041 if (MMX_CLASS_P (regclass
))
20044 switch (GET_MODE_SIZE (mode
))
20056 return MAX (ix86_cost
->hard_register
.mmx_load
[index
],
20057 ix86_cost
->hard_register
.mmx_store
[index
]);
20058 return in
? ix86_cost
->hard_register
.mmx_load
[index
]
20059 : ix86_cost
->hard_register
.mmx_store
[index
];
20061 switch (GET_MODE_SIZE (mode
))
20064 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
20067 return ix86_cost
->hard_register
.int_store
[0];
20068 if (TARGET_PARTIAL_REG_DEPENDENCY
20069 && optimize_function_for_speed_p (cfun
))
20070 cost
= ix86_cost
->hard_register
.movzbl_load
;
20072 cost
= ix86_cost
->hard_register
.int_load
[0];
20074 return MAX (cost
, ix86_cost
->hard_register
.int_store
[0]);
20080 return MAX (ix86_cost
->hard_register
.movzbl_load
,
20081 ix86_cost
->hard_register
.int_store
[0] + 4);
20083 return ix86_cost
->hard_register
.movzbl_load
;
20085 return ix86_cost
->hard_register
.int_store
[0] + 4;
20092 cost
= MAX (ix86_cost
->hard_register
.int_load
[1],
20093 ix86_cost
->hard_register
.int_store
[1]);
20095 cost
= in
? ix86_cost
->hard_register
.int_load
[1]
20096 : ix86_cost
->hard_register
.int_store
[1];
20098 if (mode
== E_HFmode
)
20100 /* Prefer SSE over GPR for HFmode. */
20102 int index
= sse_store_index (mode
);
20104 sse_cost
= MAX (ix86_cost
->hard_register
.sse_load
[index
],
20105 ix86_cost
->hard_register
.sse_store
[index
]);
20108 ? ix86_cost
->hard_register
.sse_load
[index
]
20109 : ix86_cost
->hard_register
.sse_store
[index
]);
20110 if (sse_cost
>= cost
)
20111 cost
= sse_cost
+ 1;
20117 cost
= MAX (ix86_cost
->hard_register
.int_load
[2],
20118 ix86_cost
->hard_register
.int_store
[2]);
20120 cost
= ix86_cost
->hard_register
.int_load
[2];
20122 cost
= ix86_cost
->hard_register
.int_store
[2];
20123 /* Multiply with the number of GPR moves needed. */
20124 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
20129 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
20131 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
20135 /* Return the cost of moving data from a register in class CLASS1 to
20136 one in class CLASS2.
20138 It is not required that the cost always equal 2 when FROM is the same as TO;
20139 on some machines it is expensive to move between registers if they are not
20140 general registers. */
20143 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
20144 reg_class_t class2_i
)
20146 enum reg_class class1
= (enum reg_class
) class1_i
;
20147 enum reg_class class2
= (enum reg_class
) class2_i
;
20149 /* In case we require secondary memory, compute cost of the store followed
20150 by load. In order to avoid bad register allocation choices, we need
20151 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20153 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
20157 cost
+= inline_memory_move_cost (mode
, class1
, 2);
20158 cost
+= inline_memory_move_cost (mode
, class2
, 2);
20160 /* In case of copying from general_purpose_register we may emit multiple
20161 stores followed by single load causing memory size mismatch stall.
20162 Count this as arbitrarily high cost of 20. */
20163 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
20164 && TARGET_MEMORY_MISMATCH_STALL
20165 && targetm
.class_max_nregs (class1
, mode
)
20166 > targetm
.class_max_nregs (class2
, mode
))
20169 /* In the case of FP/MMX moves, the registers actually overlap, and we
20170 have to switch modes in order to treat them differently. */
20171 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
20172 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
20178 /* Moves between MMX and non-MMX units require secondary memory. */
20179 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
20180 gcc_unreachable ();
20182 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20183 return (SSE_CLASS_P (class1
)
20184 ? ix86_cost
->hard_register
.sse_to_integer
20185 : ix86_cost
->hard_register
.integer_to_sse
);
20187 /* Moves between mask register and GPR. */
20188 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
20190 return (MASK_CLASS_P (class1
)
20191 ? ix86_cost
->hard_register
.mask_to_integer
20192 : ix86_cost
->hard_register
.integer_to_mask
);
20194 /* Moving between mask registers. */
20195 if (MASK_CLASS_P (class1
) && MASK_CLASS_P (class2
))
20196 return ix86_cost
->hard_register
.mask_move
;
20198 if (MAYBE_FLOAT_CLASS_P (class1
))
20199 return ix86_cost
->hard_register
.fp_move
;
20200 if (MAYBE_SSE_CLASS_P (class1
))
20202 if (GET_MODE_BITSIZE (mode
) <= 128)
20203 return ix86_cost
->hard_register
.xmm_move
;
20204 if (GET_MODE_BITSIZE (mode
) <= 256)
20205 return ix86_cost
->hard_register
.ymm_move
;
20206 return ix86_cost
->hard_register
.zmm_move
;
20208 if (MAYBE_MMX_CLASS_P (class1
))
20209 return ix86_cost
->hard_register
.mmx_move
;
20213 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20214 words of a value of mode MODE but can be less for certain modes in
20215 special long registers.
20217 Actually there are no two word move instructions for consecutive
20218 registers. And only registers 0-3 may have mov byte instructions
20219 applied to them. */
20221 static unsigned int
20222 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
20224 if (GENERAL_REGNO_P (regno
))
20226 if (mode
== XFmode
)
20227 return TARGET_64BIT
? 2 : 3;
20228 if (mode
== XCmode
)
20229 return TARGET_64BIT
? 4 : 6;
20230 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
20232 if (COMPLEX_MODE_P (mode
))
20234 /* Register pair for mask registers. */
20235 if (mode
== P2QImode
|| mode
== P2HImode
)
20237 if (mode
== V64SFmode
|| mode
== V64SImode
)
20242 /* Implement REGMODE_NATURAL_SIZE(MODE). */
20244 ix86_regmode_natural_size (machine_mode mode
)
20246 if (mode
== P2HImode
|| mode
== P2QImode
)
20247 return GET_MODE_SIZE (mode
) / 2;
20248 return UNITS_PER_WORD
;
20251 /* Implement TARGET_HARD_REGNO_MODE_OK. */
20254 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
20256 /* Flags and only flags can only hold CCmode values. */
20257 if (CC_REGNO_P (regno
))
20258 return GET_MODE_CLASS (mode
) == MODE_CC
;
20259 if (GET_MODE_CLASS (mode
) == MODE_CC
20260 || GET_MODE_CLASS (mode
) == MODE_RANDOM
)
20262 if (STACK_REGNO_P (regno
))
20263 return VALID_FP_MODE_P (mode
);
20264 if (MASK_REGNO_P (regno
))
20266 /* Register pair only starts at even register number. */
20267 if ((mode
== P2QImode
|| mode
== P2HImode
))
20268 return MASK_PAIR_REGNO_P(regno
);
20270 return ((TARGET_AVX512F
&& VALID_MASK_REG_MODE (mode
))
20271 || (TARGET_AVX512BW
20272 && VALID_MASK_AVX512BW_MODE (mode
)));
20275 if (GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
20278 if (SSE_REGNO_P (regno
))
20280 /* We implement the move patterns for all vector modes into and
20281 out of SSE registers, even when no operation instructions
20284 /* For AVX-512 we allow, regardless of regno:
20286 - any of 512-bit wide vector mode
20287 - any scalar mode. */
20289 && (VALID_AVX512F_REG_OR_XI_MODE (mode
)
20290 || VALID_AVX512F_SCALAR_MODE (mode
)))
20293 /* For AVX-5124FMAPS or AVX-5124VNNIW
20294 allow V64SF and V64SI modes for special regnos. */
20295 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
20296 && (mode
== V64SFmode
|| mode
== V64SImode
)
20297 && MOD4_SSE_REGNO_P (regno
))
20300 /* TODO check for QI/HI scalars. */
20301 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20302 if (TARGET_AVX512VL
20303 && (VALID_AVX256_REG_OR_OI_MODE (mode
)
20304 || VALID_AVX512VL_128_REG_MODE (mode
)))
20307 /* xmm16-xmm31 are only available for AVX-512. */
20308 if (EXT_REX_SSE_REGNO_P (regno
))
20311 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20312 if (TARGET_SSE2
&& mode
== HImode
)
20315 /* OImode and AVX modes are available only when AVX is enabled. */
20316 return ((TARGET_AVX
20317 && VALID_AVX256_REG_OR_OI_MODE (mode
))
20318 || VALID_SSE_REG_MODE (mode
)
20319 || VALID_SSE2_REG_MODE (mode
)
20320 || VALID_MMX_REG_MODE (mode
)
20321 || VALID_MMX_REG_MODE_3DNOW (mode
));
20323 if (MMX_REGNO_P (regno
))
20325 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20326 so if the register is available at all, then we can move data of
20327 the given mode into or out of it. */
20328 return (VALID_MMX_REG_MODE (mode
)
20329 || VALID_MMX_REG_MODE_3DNOW (mode
));
20332 if (mode
== QImode
)
20334 /* Take care for QImode values - they can be in non-QI regs,
20335 but then they do cause partial register stalls. */
20336 if (ANY_QI_REGNO_P (regno
))
20338 if (!TARGET_PARTIAL_REG_STALL
)
20340 /* LRA checks if the hard register is OK for the given mode.
20341 QImode values can live in non-QI regs, so we allow all
20343 if (lra_in_progress
)
20345 return !can_create_pseudo_p ();
20347 /* We handle both integer and floats in the general purpose registers. */
20348 else if (VALID_INT_MODE_P (mode
)
20349 || VALID_FP_MODE_P (mode
))
20351 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20352 on to use that value in smaller contexts, this can easily force a
20353 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20354 supporting DImode, allow it. */
20355 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
20361 /* Implement TARGET_INSN_CALLEE_ABI. */
20363 const predefined_function_abi
&
20364 ix86_insn_callee_abi (const rtx_insn
*insn
)
20366 unsigned int abi_id
= 0;
20367 rtx pat
= PATTERN (insn
);
20368 if (vzeroupper_pattern (pat
, VOIDmode
))
20369 abi_id
= ABI_VZEROUPPER
;
20371 return function_abis
[abi_id
];
20374 /* Initialize function_abis with corresponding abi_id,
20375 currently only handle vzeroupper. */
20377 ix86_initialize_callee_abi (unsigned int abi_id
)
20379 gcc_assert (abi_id
== ABI_VZEROUPPER
);
20380 predefined_function_abi
&vzeroupper_abi
= function_abis
[abi_id
];
20381 if (!vzeroupper_abi
.initialized_p ())
20383 HARD_REG_SET full_reg_clobbers
;
20384 CLEAR_HARD_REG_SET (full_reg_clobbers
);
20385 vzeroupper_abi
.initialize (ABI_VZEROUPPER
, full_reg_clobbers
);
20390 ix86_expand_avx_vzeroupper (void)
20392 /* Initialize vzeroupper_abi here. */
20393 ix86_initialize_callee_abi (ABI_VZEROUPPER
);
20394 rtx_insn
*insn
= emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20395 /* Return false for non-local goto in can_nonlocal_goto. */
20396 make_reg_eh_region_note (insn
, 0, INT_MIN
);
20397 /* Flag used for call_insn indicates it's a fake call. */
20398 RTX_FLAG (insn
, used
) = 1;
20402 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20403 saves SSE registers across calls is Win64 (thus no need to check the
20404 current ABI here), and with AVX enabled Win64 only guarantees that
20405 the low 16 bytes are saved. */
20408 ix86_hard_regno_call_part_clobbered (unsigned int abi_id
, unsigned int regno
,
20411 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20412 if (abi_id
== ABI_VZEROUPPER
)
20413 return (GET_MODE_SIZE (mode
) > 16
20414 && ((TARGET_64BIT
&& REX_SSE_REGNO_P (regno
))
20415 || LEGACY_SSE_REGNO_P (regno
)));
20417 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
20420 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20421 tieable integer mode. */
20424 ix86_tieable_integer_mode_p (machine_mode mode
)
20433 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
20436 return TARGET_64BIT
;
20443 /* Implement TARGET_MODES_TIEABLE_P.
20445 Return true if MODE1 is accessible in a register that can hold MODE2
20446 without copying. That is, all register classes that can hold MODE2
20447 can also hold MODE1. */
20450 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
20452 if (mode1
== mode2
)
20455 if (ix86_tieable_integer_mode_p (mode1
)
20456 && ix86_tieable_integer_mode_p (mode2
))
20459 /* MODE2 being XFmode implies fp stack or general regs, which means we
20460 can tie any smaller floating point modes to it. Note that we do not
20461 tie this with TFmode. */
20462 if (mode2
== XFmode
)
20463 return mode1
== SFmode
|| mode1
== DFmode
;
20465 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20466 that we can tie it with SFmode. */
20467 if (mode2
== DFmode
)
20468 return mode1
== SFmode
;
20470 /* If MODE2 is only appropriate for an SSE register, then tie with
20471 any other mode acceptable to SSE registers. */
20472 if (GET_MODE_SIZE (mode2
) == 64
20473 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
20474 return (GET_MODE_SIZE (mode1
) == 64
20475 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
20476 if (GET_MODE_SIZE (mode2
) == 32
20477 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
20478 return (GET_MODE_SIZE (mode1
) == 32
20479 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
20480 if (GET_MODE_SIZE (mode2
) == 16
20481 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
20482 return (GET_MODE_SIZE (mode1
) == 16
20483 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
20485 /* If MODE2 is appropriate for an MMX register, then tie
20486 with any other mode acceptable to MMX registers. */
20487 if (GET_MODE_SIZE (mode2
) == 8
20488 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
20489 return (GET_MODE_SIZE (mode1
) == 8
20490 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
20492 /* SCmode and DImode can be tied. */
20493 if ((mode1
== E_SCmode
&& mode2
== E_DImode
)
20494 || (mode1
== E_DImode
&& mode2
== E_SCmode
))
20495 return TARGET_64BIT
;
20497 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
20498 if ((mode1
== E_SCmode
&& mode2
== E_V2SFmode
)
20499 || (mode1
== E_V2SFmode
&& mode2
== E_SCmode
)
20500 || (mode1
== E_DCmode
&& mode2
== E_V2DFmode
)
20501 || (mode1
== E_V2DFmode
&& mode2
== E_DCmode
))
20507 /* Return the cost of moving between two registers of mode MODE. */
20510 ix86_set_reg_reg_cost (machine_mode mode
)
20512 unsigned int units
= UNITS_PER_WORD
;
20514 switch (GET_MODE_CLASS (mode
))
20520 units
= GET_MODE_SIZE (CCmode
);
20524 if ((TARGET_SSE
&& mode
== TFmode
)
20525 || (TARGET_80387
&& mode
== XFmode
)
20526 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
20527 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
20528 units
= GET_MODE_SIZE (mode
);
20531 case MODE_COMPLEX_FLOAT
:
20532 if ((TARGET_SSE
&& mode
== TCmode
)
20533 || (TARGET_80387
&& mode
== XCmode
)
20534 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
20535 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
20536 units
= GET_MODE_SIZE (mode
);
20539 case MODE_VECTOR_INT
:
20540 case MODE_VECTOR_FLOAT
:
20541 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
20542 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
20543 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20544 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20545 || ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
20546 && VALID_MMX_REG_MODE (mode
)))
20547 units
= GET_MODE_SIZE (mode
);
20550 /* Return the cost of moving between two registers of mode MODE,
20551 assuming that the move will be in pieces of at most UNITS bytes. */
20552 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
20555 /* Return cost of vector operation in MODE given that scalar version has
20559 ix86_vec_cost (machine_mode mode
, int cost
)
20561 if (!VECTOR_MODE_P (mode
))
20564 if (GET_MODE_BITSIZE (mode
) == 128
20565 && TARGET_SSE_SPLIT_REGS
)
20566 return cost
* GET_MODE_BITSIZE (mode
) / 64;
20567 else if (GET_MODE_BITSIZE (mode
) > 128
20568 && TARGET_AVX256_SPLIT_REGS
)
20569 return cost
* GET_MODE_BITSIZE (mode
) / 128;
20570 else if (GET_MODE_BITSIZE (mode
) > 256
20571 && TARGET_AVX512_SPLIT_REGS
)
20572 return cost
* GET_MODE_BITSIZE (mode
) / 256;
20576 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
20577 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
20579 ix86_widen_mult_cost (const struct processor_costs
*cost
,
20580 enum machine_mode mode
, bool uns_p
)
20582 gcc_assert (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
20583 int extra_cost
= 0;
20584 int basic_cost
= 0;
20589 if (!uns_p
|| mode
== V16HImode
)
20590 extra_cost
= cost
->sse_op
* 2;
20591 basic_cost
= cost
->mulss
* 2 + cost
->sse_op
* 4;
20595 /* pmulhw/pmullw can be used. */
20596 basic_cost
= cost
->mulss
* 2 + cost
->sse_op
* 2;
20599 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
20600 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
20601 if (!TARGET_SSE4_1
&& !uns_p
)
20602 extra_cost
= (cost
->mulss
+ cost
->addss
+ cost
->sse_op
) * 4
20603 + cost
->sse_op
* 2;
20606 basic_cost
= cost
->mulss
* 2 + cost
->sse_op
* 4;
20609 /* Not implemented. */
20612 return ix86_vec_cost (mode
, basic_cost
+ extra_cost
);
20615 /* Return cost of multiplication in MODE. */
20618 ix86_multiplication_cost (const struct processor_costs
*cost
,
20619 enum machine_mode mode
)
20621 machine_mode inner_mode
= mode
;
20622 if (VECTOR_MODE_P (mode
))
20623 inner_mode
= GET_MODE_INNER (mode
);
20625 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
20626 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
20627 else if (X87_FLOAT_MODE_P (mode
))
20629 else if (FLOAT_MODE_P (mode
))
20630 return ix86_vec_cost (mode
,
20631 inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
);
20632 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
20635 /* Cost of reading the memory. */
20642 /* Partial V*QImode is emulated with 4-6 insns. */
20647 if (TARGET_AVX512BW
&& TARGET_AVX512VL
)
20649 else if (TARGET_AVX2
)
20651 else if (TARGET_XOP
)
20652 extra
+= cost
->sse_load
[2];
20656 extra
+= cost
->sse_load
[2];
20661 /* V*QImode is emulated with 4-11 insns. */
20666 if (TARGET_AVX2
&& !TARGET_PREFER_AVX128
)
20668 if (!(TARGET_AVX512BW
&& TARGET_AVX512VL
))
20671 else if (TARGET_XOP
)
20675 extra
+= cost
->sse_load
[2];
20681 extra
+= cost
->sse_load
[2];
20690 if (!TARGET_AVX512BW
|| TARGET_PREFER_AVX256
)
20694 extra
+= cost
->sse_load
[3] * 2;
20701 extra
= cost
->sse_load
[3] * 2 + cost
->sse_load
[4] * 2;
20704 return ix86_vec_cost (mode
, cost
->mulss
* nmults
20705 + cost
->sse_op
* nops
) + extra
;
20708 /* pmulld is used in this case. No emulation is needed. */
20711 /* V4SImode is emulated with 7 insns. */
20713 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5);
20717 /* vpmullq is used in this case. No emulation is needed. */
20718 if (TARGET_AVX512DQ
&& TARGET_AVX512VL
)
20720 /* V*DImode is emulated with 6-8 insns. */
20721 else if (TARGET_XOP
&& mode
== V2DImode
)
20722 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 4);
20725 /* vpmullq is used in this case. No emulation is needed. */
20726 if (TARGET_AVX512DQ
&& mode
== V8DImode
)
20729 return ix86_vec_cost (mode
, cost
->mulss
* 3 + cost
->sse_op
* 5);
20733 return ix86_vec_cost (mode
, cost
->mulss
);
20737 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
20740 /* Return cost of multiplication in MODE. */
20743 ix86_division_cost (const struct processor_costs
*cost
,
20744 enum machine_mode mode
)
20746 machine_mode inner_mode
= mode
;
20747 if (VECTOR_MODE_P (mode
))
20748 inner_mode
= GET_MODE_INNER (mode
);
20750 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
20751 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
20752 else if (X87_FLOAT_MODE_P (mode
))
20754 else if (FLOAT_MODE_P (mode
))
20755 return ix86_vec_cost (mode
,
20756 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
);
20758 return cost
->divide
[MODE_INDEX (mode
)];
20761 /* Return cost of shift in MODE.
20762 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
20763 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
20764 if op1 is a result of subreg.
20766 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
20769 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
20770 enum rtx_code code
,
20771 enum machine_mode mode
, bool constant_op1
,
20772 HOST_WIDE_INT op1_val
,
20774 bool shift_and_truncate
,
20775 bool *skip_op0
, bool *skip_op1
)
20778 *skip_op0
= *skip_op1
= false;
20780 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
20783 /* Cost of reading the memory. */
20791 /* Use vpbroadcast. */
20792 extra
= cost
->sse_op
;
20794 extra
= cost
->sse_load
[2];
20798 if (code
== ASHIFTRT
)
20806 else if (TARGET_AVX512BW
&& TARGET_AVX512VL
)
20807 return ix86_vec_cost (mode
, cost
->sse_op
* 4);
20808 else if (TARGET_SSE4_1
)
20810 else if (code
== ASHIFTRT
)
20814 return ix86_vec_cost (mode
, cost
->sse_op
* count
) + extra
;
20819 /* For XOP we use vpshab, which requires a broadcast of the
20820 value to the variable shift insn. For constants this
20821 means a V16Q const in mem; even when we can perform the
20822 shift with one insn set the cost to prefer paddb. */
20825 extra
= cost
->sse_load
[2];
20826 return ix86_vec_cost (mode
, cost
->sse_op
) + extra
;
20830 count
= (code
== ASHIFT
) ? 3 : 4;
20831 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
20837 /* Use vpbroadcast. */
20838 extra
= cost
->sse_op
;
20840 extra
= (mode
== V16QImode
) ? cost
->sse_load
[2] : cost
->sse_load
[3];
20844 if (code
== ASHIFTRT
)
20852 else if (TARGET_AVX512BW
20853 && ((mode
== V32QImode
&& !TARGET_PREFER_AVX256
)
20854 || (mode
== V16QImode
&& TARGET_AVX512VL
20855 && !TARGET_PREFER_AVX128
)))
20856 return ix86_vec_cost (mode
, cost
->sse_op
* 4);
20857 else if (TARGET_AVX2
20858 && mode
== V16QImode
&& !TARGET_PREFER_AVX128
)
20860 else if (TARGET_SSE4_1
)
20862 else if (code
== ASHIFTRT
)
20866 return ix86_vec_cost (mode
, cost
->sse_op
* count
) + extra
;
20870 /* V*DImode arithmetic right shift is emulated. */
20871 if (code
== ASHIFTRT
&& !TARGET_AVX512VL
)
20876 count
= TARGET_SSE4_2
? 1 : 2;
20877 else if (TARGET_XOP
)
20879 else if (TARGET_SSE4_1
)
20884 else if (TARGET_XOP
)
20886 else if (TARGET_SSE4_2
)
20891 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
20895 return ix86_vec_cost (mode
, cost
->sse_op
);
20899 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20904 return cost
->shift_const
+ COSTS_N_INSNS (2);
20906 return cost
->shift_const
* 2;
20911 return cost
->shift_var
* 2;
20913 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
20919 return cost
->shift_const
;
20920 else if (shift_and_truncate
)
20923 *skip_op0
= *skip_op1
= true;
20924 /* Return the cost after shift-and truncation. */
20925 return cost
->shift_var
;
20928 return cost
->shift_var
;
20932 /* Compute a (partial) cost for rtx X. Return true if the complete
20933 cost has been computed, and false if subexpressions should be
20934 scanned. In either case, *TOTAL contains the cost result. */
20937 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
20938 int *total
, bool speed
)
20941 enum rtx_code code
= GET_CODE (x
);
20942 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
20943 const struct processor_costs
*cost
20944 = speed
? ix86_tune_cost
: &ix86_size_cost
;
20950 if (register_operand (SET_DEST (x
), VOIDmode
)
20951 && register_operand (SET_SRC (x
), VOIDmode
))
20953 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
20957 if (register_operand (SET_SRC (x
), VOIDmode
))
20958 /* Avoid potentially incorrect high cost from rtx_costs
20959 for non-tieable SUBREGs. */
20963 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
20965 if (CONSTANT_P (SET_SRC (x
)))
20966 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
20967 a small value, possibly zero for cheap constants. */
20968 src_cost
+= COSTS_N_INSNS (1);
20971 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
20978 if (x86_64_immediate_operand (x
, VOIDmode
))
20985 if (IS_STACK_MODE (mode
))
20986 switch (standard_80387_constant_p (x
))
20994 default: /* Other constants */
21001 switch (standard_sse_constant_p (x
, mode
))
21005 case 1: /* 0: xor eliminates false dependency */
21008 default: /* -1: cmp contains false dependency */
21014 case CONST_WIDE_INT
:
21015 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21016 it'll probably end up. Add a penalty for size. */
21017 *total
= (COSTS_N_INSNS (1)
21018 + (!TARGET_64BIT
&& flag_pic
)
21019 + (GET_MODE_SIZE (mode
) <= 4
21020 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
21024 /* The zero extensions is often completely free on x86_64, so make
21025 it as cheap as possible. */
21026 if (TARGET_64BIT
&& mode
== DImode
21027 && GET_MODE (XEXP (x
, 0)) == SImode
)
21029 else if (TARGET_ZERO_EXTEND_WITH_AND
)
21030 *total
= cost
->add
;
21032 *total
= cost
->movzx
;
21036 *total
= cost
->movsx
;
21040 if (SCALAR_INT_MODE_P (mode
)
21041 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
21042 && CONST_INT_P (XEXP (x
, 1)))
21044 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
21047 *total
= cost
->add
;
21050 if ((value
== 2 || value
== 3)
21051 && cost
->lea
<= cost
->shift_const
)
21053 *total
= cost
->lea
;
21063 bool skip_op0
, skip_op1
;
21064 *total
= ix86_shift_rotate_cost (cost
, code
, mode
,
21065 CONSTANT_P (XEXP (x
, 1)),
21066 CONST_INT_P (XEXP (x
, 1))
21067 ? INTVAL (XEXP (x
, 1)) : -1,
21068 GET_CODE (XEXP (x
, 1)) == AND
,
21069 SUBREG_P (XEXP (x
, 1))
21070 && GET_CODE (XEXP (XEXP (x
, 1),
21072 &skip_op0
, &skip_op1
);
21073 if (skip_op0
|| skip_op1
)
21076 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
21078 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
21087 gcc_assert (FLOAT_MODE_P (mode
));
21088 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
21090 *total
= ix86_vec_cost (mode
,
21091 GET_MODE_INNER (mode
) == SFmode
21092 ? cost
->fmass
: cost
->fmasd
);
21093 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
21095 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21097 if (GET_CODE (sub
) == NEG
)
21098 sub
= XEXP (sub
, 0);
21099 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
21102 if (GET_CODE (sub
) == NEG
)
21103 sub
= XEXP (sub
, 0);
21104 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
21109 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
21111 rtx op0
= XEXP (x
, 0);
21112 rtx op1
= XEXP (x
, 1);
21114 if (CONST_INT_P (XEXP (x
, 1)))
21116 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
21117 for (nbits
= 0; value
!= 0; value
&= value
- 1)
21121 /* This is arbitrary. */
21124 /* Compute costs correctly for widening multiplication. */
21125 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
21126 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
21127 == GET_MODE_SIZE (mode
))
21129 int is_mulwiden
= 0;
21130 machine_mode inner_mode
= GET_MODE (op0
);
21132 if (GET_CODE (op0
) == GET_CODE (op1
))
21133 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
21134 else if (CONST_INT_P (op1
))
21136 if (GET_CODE (op0
) == SIGN_EXTEND
)
21137 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
21140 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
21144 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
21148 // Double word multiplication requires 3 mults and 2 adds.
21149 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21151 mult_init
= 3 * cost
->mult_init
[MODE_INDEX (word_mode
)]
21155 else mult_init
= cost
->mult_init
[MODE_INDEX (mode
)];
21157 *total
= (mult_init
21158 + nbits
* cost
->mult_bit
21159 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
21160 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
21164 *total
= ix86_multiplication_cost (cost
, mode
);
21171 *total
= ix86_division_cost (cost
, mode
);
21175 if (GET_MODE_CLASS (mode
) == MODE_INT
21176 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
21178 if (GET_CODE (XEXP (x
, 0)) == PLUS
21179 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
21180 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
21181 && CONSTANT_P (XEXP (x
, 1)))
21183 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
21184 if (val
== 2 || val
== 4 || val
== 8)
21186 *total
= cost
->lea
;
21187 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
21188 outer_code
, opno
, speed
);
21189 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
21190 outer_code
, opno
, speed
);
21191 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21192 outer_code
, opno
, speed
);
21196 else if (GET_CODE (XEXP (x
, 0)) == MULT
21197 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
21199 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
21200 if (val
== 2 || val
== 4 || val
== 8)
21202 *total
= cost
->lea
;
21203 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21204 outer_code
, opno
, speed
);
21205 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21206 outer_code
, opno
, speed
);
21210 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
21212 rtx op
= XEXP (XEXP (x
, 0), 0);
21214 /* Add with carry, ignore the cost of adding a carry flag. */
21215 if (ix86_carry_flag_operator (op
, mode
)
21216 || ix86_carry_flag_unset_operator (op
, mode
))
21217 *total
= cost
->add
;
21220 *total
= cost
->lea
;
21221 *total
+= rtx_cost (op
, mode
,
21222 outer_code
, opno
, speed
);
21225 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
21226 outer_code
, opno
, speed
);
21227 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21228 outer_code
, opno
, speed
);
21235 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21236 if (GET_MODE_CLASS (mode
) == MODE_INT
21237 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
21238 && GET_CODE (XEXP (x
, 0)) == MINUS
21239 && (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
)
21240 || ix86_carry_flag_unset_operator (XEXP (XEXP (x
, 0), 1), mode
)))
21242 *total
= cost
->add
;
21243 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21244 outer_code
, opno
, speed
);
21245 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21246 outer_code
, opno
, speed
);
21250 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21251 *total
= cost
->addss
;
21252 else if (X87_FLOAT_MODE_P (mode
))
21253 *total
= cost
->fadd
;
21254 else if (FLOAT_MODE_P (mode
))
21255 *total
= ix86_vec_cost (mode
, cost
->addss
);
21256 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21257 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21258 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21259 *total
= cost
->add
* 2;
21261 *total
= cost
->add
;
21265 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
21266 || SSE_FLOAT_MODE_P (mode
))
21268 /* (ior (not ...) ...) can be a single insn in AVX512. */
21269 if (GET_CODE (XEXP (x
, 0)) == NOT
&& TARGET_AVX512F
21270 && (GET_MODE_SIZE (mode
) == 64
21271 || (TARGET_AVX512VL
21272 && (GET_MODE_SIZE (mode
) == 32
21273 || GET_MODE_SIZE (mode
) == 16))))
21275 rtx right
= GET_CODE (XEXP (x
, 1)) != NOT
21276 ? XEXP (x
, 1) : XEXP (XEXP (x
, 1), 0);
21278 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21279 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21280 outer_code
, opno
, speed
)
21281 + rtx_cost (right
, mode
, outer_code
, opno
, speed
);
21284 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21286 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21287 *total
= cost
->add
* 2;
21289 *total
= cost
->add
;
21293 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
21294 || SSE_FLOAT_MODE_P (mode
))
21295 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21296 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21297 *total
= cost
->add
* 2;
21299 *total
= cost
->add
;
21303 if (address_no_seg_operand (x
, mode
))
21305 *total
= cost
->lea
;
21308 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
21309 || SSE_FLOAT_MODE_P (mode
))
21311 /* pandn is a single instruction. */
21312 if (GET_CODE (XEXP (x
, 0)) == NOT
)
21314 rtx right
= XEXP (x
, 1);
21316 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
21317 if (GET_CODE (right
) == NOT
&& TARGET_AVX512F
21318 && (GET_MODE_SIZE (mode
) == 64
21319 || (TARGET_AVX512VL
21320 && (GET_MODE_SIZE (mode
) == 32
21321 || GET_MODE_SIZE (mode
) == 16))))
21322 right
= XEXP (right
, 0);
21324 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21325 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21326 outer_code
, opno
, speed
)
21327 + rtx_cost (right
, mode
, outer_code
, opno
, speed
);
21330 else if (GET_CODE (XEXP (x
, 1)) == NOT
)
21332 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21333 + rtx_cost (XEXP (x
, 0), mode
,
21334 outer_code
, opno
, speed
)
21335 + rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
21336 outer_code
, opno
, speed
);
21339 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21341 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21343 if (TARGET_BMI
&& GET_CODE (XEXP (x
,0)) == NOT
)
21345 *total
= cost
->add
* 2
21346 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21347 outer_code
, opno
, speed
)
21348 + rtx_cost (XEXP (x
, 1), mode
,
21349 outer_code
, opno
, speed
);
21352 else if (TARGET_BMI
&& GET_CODE (XEXP (x
, 1)) == NOT
)
21354 *total
= cost
->add
* 2
21355 + rtx_cost (XEXP (x
, 0), mode
,
21356 outer_code
, opno
, speed
)
21357 + rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
21358 outer_code
, opno
, speed
);
21361 *total
= cost
->add
* 2;
21363 else if (TARGET_BMI
&& GET_CODE (XEXP (x
,0)) == NOT
)
21366 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21367 outer_code
, opno
, speed
)
21368 + rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
);
21371 else if (TARGET_BMI
&& GET_CODE (XEXP (x
,1)) == NOT
)
21374 + rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
21375 + rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
21376 outer_code
, opno
, speed
);
21380 *total
= cost
->add
;
21384 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21386 /* (not (xor ...)) can be a single insn in AVX512. */
21387 if (GET_CODE (XEXP (x
, 0)) == XOR
&& TARGET_AVX512F
21388 && (GET_MODE_SIZE (mode
) == 64
21389 || (TARGET_AVX512VL
21390 && (GET_MODE_SIZE (mode
) == 32
21391 || GET_MODE_SIZE (mode
) == 16))))
21393 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21394 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21395 outer_code
, opno
, speed
)
21396 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
21397 outer_code
, opno
, speed
);
21401 // vnot is pxor -1.
21402 *total
= ix86_vec_cost (mode
, cost
->sse_op
) + 1;
21404 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21405 *total
= cost
->add
* 2;
21407 *total
= cost
->add
;
21411 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21412 *total
= cost
->sse_op
;
21413 else if (X87_FLOAT_MODE_P (mode
))
21414 *total
= cost
->fchs
;
21415 else if (FLOAT_MODE_P (mode
))
21416 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21417 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21418 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21419 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21420 *total
= cost
->add
* 3;
21422 *total
= cost
->add
;
21429 if (GET_CODE (op0
) == ZERO_EXTRACT
21430 && XEXP (op0
, 1) == const1_rtx
21431 && CONST_INT_P (XEXP (op0
, 2))
21432 && op1
== const0_rtx
)
21434 /* This kind of construct is implemented using test[bwl].
21435 Treat it as if we had an AND. */
21436 mode
= GET_MODE (XEXP (op0
, 0));
21437 *total
= (cost
->add
21438 + rtx_cost (XEXP (op0
, 0), mode
, outer_code
,
21440 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
21444 if (GET_CODE (op0
) == PLUS
&& rtx_equal_p (XEXP (op0
, 0), op1
))
21446 /* This is an overflow detection, count it as a normal compare. */
21447 *total
= rtx_cost (op0
, GET_MODE (op0
), COMPARE
, 0, speed
);
21453 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
21454 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
21455 if (mode
== CCCmode
21456 && GET_CODE (op0
) == NEG
21457 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
21458 && REG_P (XEXP (geu
, 0))
21459 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
21460 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
21461 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
21462 && XEXP (geu
, 1) == const0_rtx
21463 && GET_CODE (op1
) == LTU
21464 && REG_P (XEXP (op1
, 0))
21465 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
21466 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
21467 && XEXP (op1
, 1) == const0_rtx
)
21469 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
21474 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
21475 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
21476 if (mode
== CCCmode
21477 && GET_CODE (op0
) == NEG
21478 && GET_CODE (XEXP (op0
, 0)) == LTU
21479 && REG_P (XEXP (XEXP (op0
, 0), 0))
21480 && GET_MODE (XEXP (XEXP (op0
, 0), 0)) == CCCmode
21481 && REGNO (XEXP (XEXP (op0
, 0), 0)) == FLAGS_REG
21482 && XEXP (XEXP (op0
, 0), 1) == const0_rtx
21483 && GET_CODE (op1
) == GEU
21484 && REG_P (XEXP (op1
, 0))
21485 && GET_MODE (XEXP (op1
, 0)) == CCCmode
21486 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
21487 && XEXP (op1
, 1) == const0_rtx
)
21489 /* This is *x86_cmc. */
21491 *total
= COSTS_N_BYTES (1);
21492 else if (TARGET_SLOW_STC
)
21493 *total
= COSTS_N_INSNS (2);
21495 *total
= COSTS_N_INSNS (1);
21499 if (SCALAR_INT_MODE_P (GET_MODE (op0
))
21500 && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
21502 if (op1
== const0_rtx
)
21504 + rtx_cost (op0
, GET_MODE (op0
), outer_code
, opno
, speed
);
21506 *total
= 3*cost
->add
21507 + rtx_cost (op0
, GET_MODE (op0
), outer_code
, opno
, speed
)
21508 + rtx_cost (op1
, GET_MODE (op0
), outer_code
, opno
, speed
);
21512 /* The embedded comparison operand is completely free. */
21513 if (!general_operand (op0
, GET_MODE (op0
)) && op1
== const0_rtx
)
21519 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21522 *total
= ix86_vec_cost (mode
, cost
->addss
);
21525 case FLOAT_TRUNCATE
:
21526 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21527 *total
= cost
->fadd
;
21529 *total
= ix86_vec_cost (mode
, cost
->addss
);
21533 /* SSE requires memory load for the constant operand. It may make
21534 sense to account for this. Of course the constant operand may or
21535 may not be reused. */
21536 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21537 *total
= cost
->sse_op
;
21538 else if (X87_FLOAT_MODE_P (mode
))
21539 *total
= cost
->fabs
;
21540 else if (FLOAT_MODE_P (mode
))
21541 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21545 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21546 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
21547 else if (X87_FLOAT_MODE_P (mode
))
21548 *total
= cost
->fsqrt
;
21549 else if (FLOAT_MODE_P (mode
))
21550 *total
= ix86_vec_cost (mode
,
21551 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
);
21555 if (XINT (x
, 1) == UNSPEC_TP
)
21557 else if (XINT (x
, 1) == UNSPEC_VTERNLOG
)
21559 *total
= cost
->sse_op
;
21562 else if (XINT (x
, 1) == UNSPEC_PTEST
)
21564 *total
= cost
->sse_op
;
21565 rtx test_op0
= XVECEXP (x
, 0, 0);
21566 if (!rtx_equal_p (test_op0
, XVECEXP (x
, 0, 1)))
21568 if (GET_CODE (test_op0
) == AND
)
21570 rtx and_op0
= XEXP (test_op0
, 0);
21571 if (GET_CODE (and_op0
) == NOT
)
21572 and_op0
= XEXP (and_op0
, 0);
21573 *total
+= rtx_cost (and_op0
, GET_MODE (and_op0
),
21575 + rtx_cost (XEXP (test_op0
, 1), GET_MODE (and_op0
),
21579 *total
= rtx_cost (test_op0
, GET_MODE (test_op0
),
21587 case VEC_DUPLICATE
:
21588 /* ??? Assume all of these vector manipulation patterns are
21589 recognizable. In which case they all pretty much have the
21591 *total
= cost
->sse_op
;
21594 mask
= XEXP (x
, 2);
21595 /* This is masked instruction, assume the same cost,
21596 as nonmasked variant. */
21597 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
21598 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
21600 *total
= cost
->sse_op
;
21604 /* An insn that accesses memory is slightly more expensive
21605 than one that does not. */
21611 if (XEXP (x
, 1) == const1_rtx
21612 && GET_CODE (XEXP (x
, 2)) == ZERO_EXTEND
21613 && GET_MODE (XEXP (x
, 2)) == SImode
21614 && GET_MODE (XEXP (XEXP (x
, 2), 0)) == QImode
)
21616 /* Ignore cost of zero extension and masking of last argument. */
21617 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
21618 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
21619 *total
+= rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, code
, 2, speed
);
21626 && VECTOR_MODE_P (mode
)
21627 && (GET_MODE_SIZE (mode
) == 16 || GET_MODE_SIZE (mode
) == 32))
21630 *total
= speed
? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
21631 if (!REG_P (XEXP (x
, 0)))
21632 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
21633 if (!REG_P (XEXP (x
, 1)))
21634 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
21635 if (!REG_P (XEXP (x
, 2)))
21636 *total
+= rtx_cost (XEXP (x
, 2), mode
, code
, 2, speed
);
21639 else if (TARGET_CMOVE
21640 && SCALAR_INT_MODE_P (mode
)
21641 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
21644 *total
= COSTS_N_INSNS (1);
21645 if (!REG_P (XEXP (x
, 0)))
21646 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
21647 if (!REG_P (XEXP (x
, 1)))
21648 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
21649 if (!REG_P (XEXP (x
, 2)))
21650 *total
+= rtx_cost (XEXP (x
, 2), mode
, code
, 2, speed
);
21662 static int current_machopic_label_num
;
21664 /* Given a symbol name and its associated stub, write out the
21665 definition of the stub. */
21668 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21670 unsigned int length
;
21671 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
21672 int label
= ++current_machopic_label_num
;
21674 /* For 64-bit we shouldn't get here. */
21675 gcc_assert (!TARGET_64BIT
);
21677 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21678 symb
= targetm
.strip_name_encoding (symb
);
21680 length
= strlen (stub
);
21681 binder_name
= XALLOCAVEC (char, length
+ 32);
21682 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
21684 length
= strlen (symb
);
21685 symbol_name
= XALLOCAVEC (char, length
+ 32);
21686 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21688 sprintf (lazy_ptr_name
, "L%d$lz", label
);
21690 if (MACHOPIC_ATT_STUB
)
21691 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
21692 else if (MACHOPIC_PURE
)
21693 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
21695 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
21697 fprintf (file
, "%s:\n", stub
);
21698 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21700 if (MACHOPIC_ATT_STUB
)
21702 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
21704 else if (MACHOPIC_PURE
)
21707 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21708 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
21709 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
21710 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
21711 label
, lazy_ptr_name
, label
);
21712 fprintf (file
, "\tjmp\t*%%ecx\n");
21715 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
21717 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
21718 it needs no stub-binding-helper. */
21719 if (MACHOPIC_ATT_STUB
)
21722 fprintf (file
, "%s:\n", binder_name
);
21726 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
21727 fprintf (file
, "\tpushl\t%%ecx\n");
21730 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
21732 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
21734 /* N.B. Keep the correspondence of these
21735 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
21736 old-pic/new-pic/non-pic stubs; altering this will break
21737 compatibility with existing dylibs. */
21740 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21741 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
21744 /* 16-byte -mdynamic-no-pic stub. */
21745 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
21747 fprintf (file
, "%s:\n", lazy_ptr_name
);
21748 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21749 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
21751 #endif /* TARGET_MACHO */
21753 /* Order the registers for register allocator. */
21756 x86_order_regs_for_local_alloc (void)
21761 /* First allocate the local general purpose registers. */
21762 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21763 if (GENERAL_REGNO_P (i
) && call_used_or_fixed_reg_p (i
))
21764 reg_alloc_order
[pos
++] = i
;
21766 /* Global general purpose registers. */
21767 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
21768 if (GENERAL_REGNO_P (i
) && !call_used_or_fixed_reg_p (i
))
21769 reg_alloc_order
[pos
++] = i
;
21771 /* x87 registers come first in case we are doing FP math
21773 if (!TARGET_SSE_MATH
)
21774 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21775 reg_alloc_order
[pos
++] = i
;
21777 /* SSE registers. */
21778 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
21779 reg_alloc_order
[pos
++] = i
;
21780 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
21781 reg_alloc_order
[pos
++] = i
;
21783 /* Extended REX SSE registers. */
21784 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
21785 reg_alloc_order
[pos
++] = i
;
21787 /* Mask register. */
21788 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
21789 reg_alloc_order
[pos
++] = i
;
21791 /* x87 registers. */
21792 if (TARGET_SSE_MATH
)
21793 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
21794 reg_alloc_order
[pos
++] = i
;
21796 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
21797 reg_alloc_order
[pos
++] = i
;
21799 /* Initialize the rest of array as we do not allocate some registers
21801 while (pos
< FIRST_PSEUDO_REGISTER
)
21802 reg_alloc_order
[pos
++] = 0;
21806 ix86_ms_bitfield_layout_p (const_tree record_type
)
21808 return ((TARGET_MS_BITFIELD_LAYOUT
21809 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
21810 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
21813 /* Returns an expression indicating where the this parameter is
21814 located on entry to the FUNCTION. */
21817 x86_this_parameter (tree function
)
21819 tree type
= TREE_TYPE (function
);
21820 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
21825 const int *parm_regs
;
21827 if (ix86_function_type_abi (type
) == MS_ABI
)
21828 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
21830 parm_regs
= x86_64_int_parameter_registers
;
21831 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
21834 nregs
= ix86_function_regparm (type
, function
);
21836 if (nregs
> 0 && !stdarg_p (type
))
21839 unsigned int ccvt
= ix86_get_callcvt (type
);
21841 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
21842 regno
= aggr
? DX_REG
: CX_REG
;
21843 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
21847 return gen_rtx_MEM (SImode
,
21848 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21857 return gen_rtx_MEM (SImode
,
21858 plus_constant (Pmode
,
21859 stack_pointer_rtx
, 4));
21862 return gen_rtx_REG (SImode
, regno
);
21865 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
21869 /* Determine whether x86_output_mi_thunk can succeed. */
21872 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
21873 const_tree function
)
21875 /* 64-bit can handle anything. */
21879 /* For 32-bit, everything's fine if we have one free register. */
21880 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
21883 /* Need a free register for vcall_offset. */
21887 /* Need a free register for GOT references. */
21888 if (flag_pic
&& !targetm
.binds_local_p (function
))
21891 /* Otherwise ok. */
21895 /* Output the assembler code for a thunk function. THUNK_DECL is the
21896 declaration for the thunk function itself, FUNCTION is the decl for
21897 the target function. DELTA is an immediate constant offset to be
21898 added to THIS. If VCALL_OFFSET is nonzero, the word at
21899 *(*this + vcall_offset) should be added to THIS. */
21902 x86_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
21903 HOST_WIDE_INT vcall_offset
, tree function
)
21905 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
21906 rtx this_param
= x86_this_parameter (function
);
21907 rtx this_reg
, tmp
, fnaddr
;
21908 unsigned int tmp_regno
;
21910 int saved_flag_force_indirect_call
= flag_force_indirect_call
;
21913 tmp_regno
= R10_REG
;
21916 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
21917 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
21918 tmp_regno
= AX_REG
;
21919 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
21920 tmp_regno
= DX_REG
;
21922 tmp_regno
= CX_REG
;
21925 flag_force_indirect_call
= 0;
21928 emit_note (NOTE_INSN_PROLOGUE_END
);
21930 /* CET is enabled, insert EB instruction. */
21931 if ((flag_cf_protection
& CF_BRANCH
))
21932 emit_insn (gen_nop_endbr ());
21934 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21935 pull it in now and let DELTA benefit. */
21936 if (REG_P (this_param
))
21937 this_reg
= this_param
;
21938 else if (vcall_offset
)
21940 /* Put the this parameter into %eax. */
21941 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
21942 emit_move_insn (this_reg
, this_param
);
21945 this_reg
= NULL_RTX
;
21947 /* Adjust the this parameter by a fixed constant. */
21950 rtx delta_rtx
= GEN_INT (delta
);
21951 rtx delta_dst
= this_reg
? this_reg
: this_param
;
21955 if (!x86_64_general_operand (delta_rtx
, Pmode
))
21957 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
21958 emit_move_insn (tmp
, delta_rtx
);
21963 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
21966 /* Adjust the this parameter by a value stored in the vtable. */
21969 rtx vcall_addr
, vcall_mem
, this_mem
;
21971 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
21973 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
21974 if (Pmode
!= ptr_mode
)
21975 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
21976 emit_move_insn (tmp
, this_mem
);
21978 /* Adjust the this parameter. */
21979 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
21981 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
21983 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
21984 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
21985 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
21988 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
21989 if (Pmode
!= ptr_mode
)
21990 emit_insn (gen_addsi_1_zext (this_reg
,
21991 gen_rtx_REG (ptr_mode
,
21995 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
21998 /* If necessary, drop THIS back to its stack slot. */
21999 if (this_reg
&& this_reg
!= this_param
)
22000 emit_move_insn (this_param
, this_reg
);
22002 fnaddr
= XEXP (DECL_RTL (function
), 0);
22005 if (!flag_pic
|| targetm
.binds_local_p (function
)
22010 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
22011 tmp
= gen_rtx_CONST (Pmode
, tmp
);
22012 fnaddr
= gen_const_mem (Pmode
, tmp
);
22017 if (!flag_pic
|| targetm
.binds_local_p (function
))
22020 else if (TARGET_MACHO
)
22022 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
22023 fnaddr
= XEXP (fnaddr
, 0);
22025 #endif /* TARGET_MACHO */
22028 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
22029 output_set_got (tmp
, NULL_RTX
);
22031 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
22032 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
22033 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
22034 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
22038 /* Our sibling call patterns do not allow memories, because we have no
22039 predicate that can distinguish between frame and non-frame memory.
22040 For our purposes here, we can get away with (ab)using a jump pattern,
22041 because we're going to do no optimization. */
22042 if (MEM_P (fnaddr
))
22044 if (sibcall_insn_operand (fnaddr
, word_mode
))
22046 fnaddr
= XEXP (DECL_RTL (function
), 0);
22047 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
22048 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
22049 tmp
= emit_call_insn (tmp
);
22050 SIBLING_CALL_P (tmp
) = 1;
22053 emit_jump_insn (gen_indirect_jump (fnaddr
));
22057 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
22059 // CM_LARGE_PIC always uses pseudo PIC register which is
22060 // uninitialized. Since FUNCTION is local and calling it
22061 // doesn't go through PLT, we use scratch register %r11 as
22062 // PIC register and initialize it here.
22063 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
22064 ix86_init_large_pic_reg (tmp_regno
);
22065 fnaddr
= legitimize_pic_address (fnaddr
,
22066 gen_rtx_REG (Pmode
, tmp_regno
));
22069 if (!sibcall_insn_operand (fnaddr
, word_mode
))
22071 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
22072 if (GET_MODE (fnaddr
) != word_mode
)
22073 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
22074 emit_move_insn (tmp
, fnaddr
);
22078 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
22079 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
22080 tmp
= emit_call_insn (tmp
);
22081 SIBLING_CALL_P (tmp
) = 1;
22085 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22086 insn
= get_insns ();
22087 shorten_branches (insn
);
22088 assemble_start_function (thunk_fndecl
, fnname
);
22089 final_start_function (insn
, file
, 1);
22090 final (insn
, file
, 1);
22091 final_end_function ();
22092 assemble_end_function (thunk_fndecl
, fnname
);
22094 flag_force_indirect_call
= saved_flag_force_indirect_call
;
22098 x86_file_start (void)
22100 default_file_start ();
22102 fputs ("\t.code16gcc\n", asm_out_file
);
22104 darwin_file_start ();
22106 if (X86_FILE_START_VERSION_DIRECTIVE
)
22107 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
22108 if (X86_FILE_START_FLTUSED
)
22109 fputs ("\t.global\t__fltused\n", asm_out_file
);
22110 if (ix86_asm_dialect
== ASM_INTEL
)
22111 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
22115 x86_field_alignment (tree type
, int computed
)
22119 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
22122 return iamcu_alignment (type
, computed
);
22123 type
= strip_array_types (type
);
22124 mode
= TYPE_MODE (type
);
22125 if (mode
== DFmode
|| mode
== DCmode
22126 || GET_MODE_CLASS (mode
) == MODE_INT
22127 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
22129 if (TYPE_ATOMIC (type
) && computed
> 32)
22131 static bool warned
;
22133 if (!warned
&& warn_psabi
)
22136 = CHANGES_ROOT_URL
"gcc-11/changes.html#ia32_atomic";
22139 inform (input_location
, "the alignment of %<_Atomic %T%> "
22140 "fields changed in %{GCC 11.1%}",
22141 TYPE_MAIN_VARIANT (type
), url
);
22145 return MIN (32, computed
);
22150 /* Print call to TARGET to FILE. */
22153 x86_print_call_or_nop (FILE *file
, const char *target
)
22155 if (flag_nop_mcount
|| !strcmp (target
, "nop"))
22156 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22157 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22159 fprintf (file
, "1:\tcall\t%s\n", target
);
22163 current_fentry_name (const char **name
)
22165 tree attr
= lookup_attribute ("fentry_name",
22166 DECL_ATTRIBUTES (current_function_decl
));
22169 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
22174 current_fentry_section (const char **name
)
22176 tree attr
= lookup_attribute ("fentry_section",
22177 DECL_ATTRIBUTES (current_function_decl
));
22180 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
22184 /* Output assembler code to FILE to increment profiler label # LABELNO
22185 for profiling a function entry. */
22187 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
22189 if (cfun
->machine
->insn_queued_at_entrance
)
22191 if (cfun
->machine
->insn_queued_at_entrance
== TYPE_ENDBR
)
22192 fprintf (file
, "\t%s\n", TARGET_64BIT
? "endbr64" : "endbr32");
22193 unsigned int patch_area_size
22194 = crtl
->patch_area_size
- crtl
->patch_area_entry
;
22195 if (patch_area_size
)
22196 ix86_output_patchable_area (patch_area_size
,
22197 crtl
->patch_area_entry
== 0);
22200 const char *mcount_name
= MCOUNT_NAME
;
22202 if (current_fentry_name (&mcount_name
))
22204 else if (fentry_name
)
22205 mcount_name
= fentry_name
;
22206 else if (flag_fentry
)
22207 mcount_name
= MCOUNT_NAME_BEFORE_PROLOGUE
;
22211 #ifndef NO_PROFILE_COUNTERS
22212 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
22215 if (!TARGET_PECOFF
)
22217 switch (ix86_cmodel
)
22220 /* NB: R10 is caller-saved. Although it can be used as a
22221 static chain register, it is preserved when calling
22222 mcount for nested functions. */
22223 fprintf (file
, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
22227 #ifdef NO_PROFILE_COUNTERS
22228 fprintf (file
, "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
22229 fprintf (file
, "\tleaq\t1b(%%rip), %%r10\n");
22230 fprintf (file
, "\taddq\t%%r11, %%r10\n");
22231 fprintf (file
, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name
);
22232 fprintf (file
, "\taddq\t%%r11, %%r10\n");
22233 fprintf (file
, "\tcall\t*%%r10\n");
22235 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
22239 case CM_MEDIUM_PIC
:
22240 if (!ix86_direct_extern_access
)
22242 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
22247 x86_print_call_or_nop (file
, mcount_name
);
22252 x86_print_call_or_nop (file
, mcount_name
);
22256 #ifndef NO_PROFILE_COUNTERS
22257 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
22260 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
22264 #ifndef NO_PROFILE_COUNTERS
22265 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
22268 x86_print_call_or_nop (file
, mcount_name
);
22271 if (flag_record_mcount
22272 || lookup_attribute ("fentry_section",
22273 DECL_ATTRIBUTES (current_function_decl
)))
22275 const char *sname
= "__mcount_loc";
22277 if (current_fentry_section (&sname
))
22279 else if (fentry_section
)
22280 sname
= fentry_section
;
22282 fprintf (file
, "\t.section %s, \"a\",@progbits\n", sname
);
22283 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
22284 fprintf (file
, "\t.previous\n");
22288 /* We don't have exact information about the insn sizes, but we may assume
22289 quite safely that we are informed about all 1 byte insns and memory
22290 address sizes. This is enough to eliminate unnecessary padding in
22294 ix86_min_insn_size (rtx_insn
*insn
)
22298 if (!INSN_P (insn
) || !active_insn_p (insn
))
22301 /* Discard alignments we've emit and jump instructions. */
22302 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
22303 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
22306 /* Important case - calls are always 5 bytes.
22307 It is common to have many calls in the row. */
22309 && symbolic_reference_mentioned_p (PATTERN (insn
))
22310 && !SIBLING_CALL_P (insn
))
22312 len
= get_attr_length (insn
);
22316 /* For normal instructions we rely on get_attr_length being exact,
22317 with a few exceptions. */
22318 if (!JUMP_P (insn
))
22320 enum attr_type type
= get_attr_type (insn
);
22325 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
22326 || asm_noperands (PATTERN (insn
)) >= 0)
22333 /* Otherwise trust get_attr_length. */
22337 l
= get_attr_length_address (insn
);
22338 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
22347 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22349 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22353 ix86_avoid_jump_mispredicts (void)
22355 rtx_insn
*insn
, *start
= get_insns ();
22356 int nbytes
= 0, njumps
= 0;
22357 bool isjump
= false;
22359 /* Look for all minimal intervals of instructions containing 4 jumps.
22360 The intervals are bounded by START and INSN. NBYTES is the total
22361 size of instructions in the interval including INSN and not including
22362 START. When the NBYTES is smaller than 16 bytes, it is possible
22363 that the end of START and INSN ends up in the same 16byte page.
22365 The smallest offset in the page INSN can start is the case where START
22366 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22367 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
22369 Don't consider asm goto as jump, while it can contain a jump, it doesn't
22370 have to, control transfer to label(s) can be performed through other
22371 means, and also we estimate minimum length of all asm stmts as 0. */
22372 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
22376 if (LABEL_P (insn
))
22378 align_flags alignment
= label_to_alignment (insn
);
22379 int align
= alignment
.levels
[0].log
;
22380 int max_skip
= alignment
.levels
[0].maxskip
;
22384 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
22385 already in the current 16 byte page, because otherwise
22386 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
22387 bytes to reach 16 byte boundary. */
22389 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
22392 fprintf (dump_file
, "Label %i with max_skip %i\n",
22393 INSN_UID (insn
), max_skip
);
22396 while (nbytes
+ max_skip
>= 16)
22398 start
= NEXT_INSN (start
);
22399 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
22401 njumps
--, isjump
= true;
22404 nbytes
-= ix86_min_insn_size (start
);
22410 min_size
= ix86_min_insn_size (insn
);
22411 nbytes
+= min_size
;
22413 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
22414 INSN_UID (insn
), min_size
);
22415 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
22423 start
= NEXT_INSN (start
);
22424 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
22426 njumps
--, isjump
= true;
22429 nbytes
-= ix86_min_insn_size (start
);
22431 gcc_assert (njumps
>= 0);
22433 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
22434 INSN_UID (start
), INSN_UID (insn
), nbytes
);
22436 if (njumps
== 3 && isjump
&& nbytes
< 16)
22438 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
22441 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
22442 INSN_UID (insn
), padsize
);
22443 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
22449 /* AMD Athlon works faster
22450 when RET is not destination of conditional jump or directly preceded
22451 by other jump instruction. We avoid the penalty by inserting NOP just
22452 before the RET instructions in such cases. */
22454 ix86_pad_returns (void)
22459 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22461 basic_block bb
= e
->src
;
22462 rtx_insn
*ret
= BB_END (bb
);
22464 bool replace
= false;
22466 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
22467 || optimize_bb_for_size_p (bb
))
22469 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
22470 if (active_insn_p (prev
) || LABEL_P (prev
))
22472 if (prev
&& LABEL_P (prev
))
22477 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
22478 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
22479 && !(e
->flags
& EDGE_FALLTHRU
))
22487 prev
= prev_active_insn (ret
);
22489 && ((JUMP_P (prev
) && any_condjump_p (prev
))
22492 /* Empty functions get branch mispredict even when
22493 the jump destination is not visible to us. */
22494 if (!prev
&& !optimize_function_for_size_p (cfun
))
22499 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
22505 /* Count the minimum number of instructions in BB. Return 4 if the
22506 number of instructions >= 4. */
22509 ix86_count_insn_bb (basic_block bb
)
22512 int insn_count
= 0;
22514 /* Count number of instructions in this block. Return 4 if the number
22515 of instructions >= 4. */
22516 FOR_BB_INSNS (bb
, insn
)
22518 /* Only happen in exit blocks. */
22520 && ANY_RETURN_P (PATTERN (insn
)))
22523 if (NONDEBUG_INSN_P (insn
)
22524 && GET_CODE (PATTERN (insn
)) != USE
22525 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
22528 if (insn_count
>= 4)
22537 /* Count the minimum number of instructions in code path in BB.
22538 Return 4 if the number of instructions >= 4. */
22541 ix86_count_insn (basic_block bb
)
22545 int min_prev_count
;
22547 /* Only bother counting instructions along paths with no
22548 more than 2 basic blocks between entry and exit. Given
22549 that BB has an edge to exit, determine if a predecessor
22550 of BB has an edge from entry. If so, compute the number
22551 of instructions in the predecessor block. If there
22552 happen to be multiple such blocks, compute the minimum. */
22553 min_prev_count
= 4;
22554 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
22557 edge_iterator prev_ei
;
22559 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
22561 min_prev_count
= 0;
22564 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
22566 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
22568 int count
= ix86_count_insn_bb (e
->src
);
22569 if (count
< min_prev_count
)
22570 min_prev_count
= count
;
22576 if (min_prev_count
< 4)
22577 min_prev_count
+= ix86_count_insn_bb (bb
);
22579 return min_prev_count
;
22582 /* Pad short function to 4 instructions. */
22585 ix86_pad_short_function (void)
22590 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22592 rtx_insn
*ret
= BB_END (e
->src
);
22593 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
22595 int insn_count
= ix86_count_insn (e
->src
);
22597 /* Pad short function. */
22598 if (insn_count
< 4)
22600 rtx_insn
*insn
= ret
;
22602 /* Find epilogue. */
22605 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
22606 insn
= PREV_INSN (insn
);
22611 /* Two NOPs count as one instruction. */
22612 insn_count
= 2 * (4 - insn_count
);
22613 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
22619 /* Fix up a Windows system unwinder issue. If an EH region falls through into
22620 the epilogue, the Windows system unwinder will apply epilogue logic and
22621 produce incorrect offsets. This can be avoided by adding a nop between
22622 the last insn that can throw and the first insn of the epilogue. */
22625 ix86_seh_fixup_eh_fallthru (void)
22630 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22632 rtx_insn
*insn
, *next
;
22634 /* Find the beginning of the epilogue. */
22635 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
22636 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
22641 /* We only care about preceding insns that can throw. */
22642 insn
= prev_active_insn (insn
);
22643 if (insn
== NULL
|| !can_throw_internal (insn
))
22646 /* Do not separate calls from their debug information. */
22647 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
22648 if (NOTE_P (next
) && NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
)
22653 emit_insn_after (gen_nops (const1_rtx
), insn
);
22656 /* Split vector load from parm_decl to elemental loads to avoid STLF
22659 ix86_split_stlf_stall_load ()
22661 rtx_insn
* insn
, *start
= get_insns ();
22662 unsigned window
= 0;
22664 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
22666 if (!NONDEBUG_INSN_P (insn
))
22669 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
22670 other, just emulate for pipeline) before stalled load, stlf stall
22671 case is as fast as no stall cases on CLX.
22672 Since CFG is freed before machine_reorg, just do a rough
22673 calculation of the window according to the layout. */
22674 if (window
> (unsigned) x86_stlf_window_ninsns
)
22677 if (any_uncondjump_p (insn
)
22678 || ANY_RETURN_P (PATTERN (insn
))
22682 rtx set
= single_set (insn
);
22685 rtx src
= SET_SRC (set
);
22687 /* Only handle V2DFmode load since it doesn't need any scratch
22689 || GET_MODE (src
) != E_V2DFmode
22691 || TREE_CODE (get_base_address (MEM_EXPR (src
))) != PARM_DECL
)
22694 rtx zero
= CONST0_RTX (V2DFmode
);
22695 rtx dest
= SET_DEST (set
);
22696 rtx m
= adjust_address (src
, DFmode
, 0);
22697 rtx loadlpd
= gen_sse2_loadlpd (dest
, zero
, m
);
22698 emit_insn_before (loadlpd
, insn
);
22699 m
= adjust_address (src
, DFmode
, 8);
22700 rtx loadhpd
= gen_sse2_loadhpd (dest
, dest
, m
);
22701 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
22703 fputs ("Due to potential STLF stall, split instruction:\n",
22705 print_rtl_single (dump_file
, insn
);
22706 fputs ("To:\n", dump_file
);
22707 print_rtl_single (dump_file
, loadlpd
);
22708 print_rtl_single (dump_file
, loadhpd
);
22710 PATTERN (insn
) = loadhpd
;
22711 INSN_CODE (insn
) = -1;
22712 gcc_assert (recog_memoized (insn
) != -1);
22716 /* Implement machine specific optimizations. We implement padding of returns
22717 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
22721 /* We are freeing block_for_insn in the toplev to keep compatibility
22722 with old MDEP_REORGS that are not CFG based. Recompute it now. */
22723 compute_bb_for_insn ();
22725 if (TARGET_SEH
&& current_function_has_exception_handlers ())
22726 ix86_seh_fixup_eh_fallthru ();
22728 if (optimize
&& optimize_function_for_speed_p (cfun
))
22731 ix86_split_stlf_stall_load ();
22732 if (TARGET_PAD_SHORT_FUNCTION
)
22733 ix86_pad_short_function ();
22734 else if (TARGET_PAD_RETURNS
)
22735 ix86_pad_returns ();
22736 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22737 if (TARGET_FOUR_JUMP_LIMIT
)
22738 ix86_avoid_jump_mispredicts ();
22743 /* Return nonzero when QImode register that must be represented via REX prefix
22746 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
22749 extract_insn_cached (insn
);
22750 for (i
= 0; i
< recog_data
.n_operands
; i
++)
22751 if (GENERAL_REG_P (recog_data
.operand
[i
])
22752 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
22757 /* Return true when INSN mentions register that must be encoded using REX
22760 x86_extended_reg_mentioned_p (rtx insn
)
22762 subrtx_iterator::array_type array
;
22763 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
22765 const_rtx x
= *iter
;
22767 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
22773 /* If profitable, negate (without causing overflow) integer constant
22774 of mode MODE at location LOC. Return true in this case. */
22776 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
22780 if (!CONST_INT_P (*loc
))
22786 /* DImode x86_64 constants must fit in 32 bits. */
22787 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
22798 gcc_unreachable ();
22801 /* Avoid overflows. */
22802 if (mode_signbit_p (mode
, *loc
))
22805 val
= INTVAL (*loc
);
22807 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
22808 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
22809 if ((val
< 0 && val
!= -128)
22812 *loc
= GEN_INT (-val
);
22819 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
22820 optabs would emit if we didn't have TFmode patterns. */
22823 x86_emit_floatuns (rtx operands
[2])
22825 rtx_code_label
*neglab
, *donelab
;
22826 rtx i0
, i1
, f0
, in
, out
;
22827 machine_mode mode
, inmode
;
22829 inmode
= GET_MODE (operands
[1]);
22830 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
22833 in
= force_reg (inmode
, operands
[1]);
22834 mode
= GET_MODE (out
);
22835 neglab
= gen_label_rtx ();
22836 donelab
= gen_label_rtx ();
22837 f0
= gen_reg_rtx (mode
);
22839 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
22841 expand_float (out
, in
, 0);
22843 emit_jump_insn (gen_jump (donelab
));
22846 emit_label (neglab
);
22848 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
22850 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
22852 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
22854 expand_float (f0
, i0
, 0);
22856 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
22858 emit_label (donelab
);
22861 /* Return the diagnostic message string if conversion from FROMTYPE to
22862 TOTYPE is not allowed, NULL otherwise. */
22864 static const char *
22865 ix86_invalid_conversion (const_tree fromtype
, const_tree totype
)
22867 machine_mode from_mode
= element_mode (fromtype
);
22868 machine_mode to_mode
= element_mode (totype
);
22870 if (!TARGET_SSE2
&& from_mode
!= to_mode
)
22872 /* Do no allow conversions to/from BFmode/HFmode scalar types
22873 when TARGET_SSE2 is not available. */
22874 if (from_mode
== BFmode
)
22875 return N_("invalid conversion from type %<__bf16%> "
22876 "without option %<-msse2%>");
22877 if (from_mode
== HFmode
)
22878 return N_("invalid conversion from type %<_Float16%> "
22879 "without option %<-msse2%>");
22880 if (to_mode
== BFmode
)
22881 return N_("invalid conversion to type %<__bf16%> "
22882 "without option %<-msse2%>");
22883 if (to_mode
== HFmode
)
22884 return N_("invalid conversion to type %<_Float16%> "
22885 "without option %<-msse2%>");
22888 /* Warn for silent implicit conversion between __bf16 and short,
22889 since __bfloat16 is refined as real __bf16 instead of short
22891 if (element_mode (fromtype
) != element_mode (totype
)
22892 && (TARGET_AVX512BF16
|| TARGET_AVXNECONVERT
))
22894 /* Warn for silent implicit conversion where user may expect
22896 if ((TYPE_MODE (fromtype
) == BFmode
22897 && TYPE_MODE (totype
) == HImode
)
22898 || (TYPE_MODE (totype
) == BFmode
22899 && TYPE_MODE (fromtype
) == HImode
))
22900 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
22901 "to real %<__bf16%> since GCC 13.1, be careful of "
22902 "implicit conversion between %<__bf16%> and %<short%>; "
22903 "an explicit bitcast may be needed here");
22906 /* Conversion allowed. */
22910 /* Return the diagnostic message string if the unary operation OP is
22911 not permitted on TYPE, NULL otherwise. */
22913 static const char *
22914 ix86_invalid_unary_op (int op
, const_tree type
)
22916 machine_mode mmode
= element_mode (type
);
22917 /* Reject all single-operand operations on BFmode/HFmode except for &
22918 when TARGET_SSE2 is not available. */
22919 if (!TARGET_SSE2
&& op
!= ADDR_EXPR
)
22921 if (mmode
== BFmode
)
22922 return N_("operation not permitted on type %<__bf16%> "
22923 "without option %<-msse2%>");
22924 if (mmode
== HFmode
)
22925 return N_("operation not permitted on type %<_Float16%> "
22926 "without option %<-msse2%>");
22929 /* Operation allowed. */
22933 /* Return the diagnostic message string if the binary operation OP is
22934 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22936 static const char *
22937 ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
22940 machine_mode type1_mode
= element_mode (type1
);
22941 machine_mode type2_mode
= element_mode (type2
);
22942 /* Reject all 2-operand operations on BFmode or HFmode
22943 when TARGET_SSE2 is not available. */
22946 if (type1_mode
== BFmode
|| type2_mode
== BFmode
)
22947 return N_("operation not permitted on type %<__bf16%> "
22948 "without option %<-msse2%>");
22950 if (type1_mode
== HFmode
|| type2_mode
== HFmode
)
22951 return N_("operation not permitted on type %<_Float16%> "
22952 "without option %<-msse2%>");
22955 /* Operation allowed. */
22960 /* Target hook for scalar_mode_supported_p. */
22962 ix86_scalar_mode_supported_p (scalar_mode mode
)
22964 if (DECIMAL_FLOAT_MODE_P (mode
))
22965 return default_decimal_float_supported_p ();
22966 else if (mode
== TFmode
)
22968 else if (mode
== HFmode
|| mode
== BFmode
)
22971 return default_scalar_mode_supported_p (mode
);
22974 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
22975 if MODE is HFmode, and punt to the generic implementation otherwise. */
22978 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
22980 /* NB: Always return TRUE for HFmode so that the _Float16 type will
22981 be defined by the C front-end for AVX512FP16 intrinsics. We will
22982 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
22984 return ((mode
== HFmode
|| mode
== BFmode
)
22986 : default_libgcc_floating_mode_supported_p (mode
));
22989 /* Implements target hook vector_mode_supported_p. */
22991 ix86_vector_mode_supported_p (machine_mode mode
)
22993 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
22995 if (!TARGET_64BIT
&& GET_MODE_INNER (mode
) == TImode
)
22997 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
22999 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
23001 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
23003 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
23005 if ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
23006 && VALID_MMX_REG_MODE (mode
))
23008 if ((TARGET_3DNOW
|| TARGET_MMX_WITH_SSE
)
23009 && VALID_MMX_REG_MODE_3DNOW (mode
))
23011 if (mode
== V2QImode
)
23016 /* Target hook for c_mode_for_suffix. */
23017 static machine_mode
23018 ix86_c_mode_for_suffix (char suffix
)
23028 /* Worker function for TARGET_MD_ASM_ADJUST.
23030 We implement asm flag outputs, and maintain source compatibility
23031 with the old cc0-based compiler. */
23034 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
23035 vec
<machine_mode
> & /*input_modes*/,
23036 vec
<const char *> &constraints
, vec
<rtx
> &clobbers
,
23037 HARD_REG_SET
&clobbered_regs
, location_t loc
)
23039 bool saw_asm_flag
= false;
23042 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
23044 const char *con
= constraints
[i
];
23045 if (!startswith (con
, "=@cc"))
23048 if (strchr (con
, ',') != NULL
)
23050 error_at (loc
, "alternatives not allowed in %<asm%> flag output");
23054 bool invert
= false;
23056 invert
= true, con
++;
23058 machine_mode mode
= CCmode
;
23059 rtx_code code
= UNKNOWN
;
23065 mode
= CCAmode
, code
= EQ
;
23066 else if (con
[1] == 'e' && con
[2] == 0)
23067 mode
= CCCmode
, code
= NE
;
23071 mode
= CCCmode
, code
= EQ
;
23072 else if (con
[1] == 'e' && con
[2] == 0)
23073 mode
= CCAmode
, code
= NE
;
23077 mode
= CCCmode
, code
= EQ
;
23081 mode
= CCZmode
, code
= EQ
;
23085 mode
= CCGCmode
, code
= GT
;
23086 else if (con
[1] == 'e' && con
[2] == 0)
23087 mode
= CCGCmode
, code
= GE
;
23091 mode
= CCGCmode
, code
= LT
;
23092 else if (con
[1] == 'e' && con
[2] == 0)
23093 mode
= CCGCmode
, code
= LE
;
23097 mode
= CCOmode
, code
= EQ
;
23101 mode
= CCPmode
, code
= EQ
;
23105 mode
= CCSmode
, code
= EQ
;
23109 mode
= CCZmode
, code
= EQ
;
23112 if (code
== UNKNOWN
)
23114 error_at (loc
, "unknown %<asm%> flag output %qs", constraints
[i
]);
23118 code
= reverse_condition (code
);
23120 rtx dest
= outputs
[i
];
23123 /* This is the first asm flag output. Here we put the flags
23124 register in as the real output and adjust the condition to
23126 constraints
[i
] = "=Bf";
23127 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
23128 saw_asm_flag
= true;
23132 /* We don't need the flags register as output twice. */
23133 constraints
[i
] = "=X";
23134 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
23137 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
23138 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
23140 machine_mode dest_mode
= GET_MODE (dest
);
23141 if (!SCALAR_INT_MODE_P (dest_mode
))
23143 error_at (loc
, "invalid type for %<asm%> flag output");
23147 if (dest_mode
== QImode
)
23148 emit_insn (gen_rtx_SET (dest
, x
));
23151 rtx reg
= gen_reg_rtx (QImode
);
23152 emit_insn (gen_rtx_SET (reg
, x
));
23154 reg
= convert_to_mode (dest_mode
, reg
, 1);
23155 emit_move_insn (dest
, reg
);
23159 rtx_insn
*seq
= get_insns ();
23166 /* If we had no asm flag outputs, clobber the flags. */
23167 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
23168 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
23173 /* Implements target vector targetm.asm.encode_section_info. */
23175 static void ATTRIBUTE_UNUSED
23176 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
23178 default_encode_section_info (decl
, rtl
, first
);
23180 if (ix86_in_large_data_p (decl
))
23181 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
23184 /* Worker function for REVERSE_CONDITION. */
23187 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
23189 return (mode
== CCFPmode
23190 ? reverse_condition_maybe_unordered (code
)
23191 : reverse_condition (code
));
23194 /* Output code to perform an x87 FP register move, from OPERANDS[1]
23198 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
23200 if (REG_P (operands
[0]))
23202 if (REG_P (operands
[1])
23203 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
23205 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
23206 return output_387_ffreep (operands
, 0);
23207 return "fstp\t%y0";
23209 if (STACK_TOP_P (operands
[0]))
23210 return "fld%Z1\t%y1";
23213 else if (MEM_P (operands
[0]))
23215 gcc_assert (REG_P (operands
[1]));
23216 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
23217 return "fstp%Z0\t%y0";
23220 /* There is no non-popping store to memory for XFmode.
23221 So if we need one, follow the store with a load. */
23222 if (GET_MODE (operands
[0]) == XFmode
)
23223 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
23225 return "fst%Z0\t%y0";
23231 #ifdef TARGET_SOLARIS
23232 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
23235 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
23238 /* With Binutils 2.15, the "@unwind" marker must be specified on
23239 every occurrence of the ".eh_frame" section, not just the first
23242 && strcmp (name
, ".eh_frame") == 0)
23244 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
23245 flags
& SECTION_WRITE
? "aw" : "a");
23250 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
23252 solaris_elf_asm_comdat_section (name
, flags
, decl
);
23256 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
23257 SPARC assembler. One cannot mix single-letter flags and #exclude, so
23258 only emit the latter here. */
23259 if (flags
& SECTION_EXCLUDE
)
23261 fprintf (asm_out_file
, "\t.section\t%s,#exclude\n", name
);
23266 default_elf_asm_named_section (name
, flags
, decl
);
23268 #endif /* TARGET_SOLARIS */
23270 /* Return the mangling of TYPE if it is an extended fundamental type. */
23272 static const char *
23273 ix86_mangle_type (const_tree type
)
23275 type
= TYPE_MAIN_VARIANT (type
);
23277 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
23278 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
23281 if (type
== float128_type_node
|| type
== float64x_type_node
)
23284 switch (TYPE_MODE (type
))
23289 /* _Float16 is "DF16_".
23290 Align with clang's decision in https://reviews.llvm.org/D33719. */
23293 /* __float128 is "g". */
23296 /* "long double" or __float80 is "e". */
23303 /* Create C++ tinfo symbols for only conditionally available fundamental
23307 ix86_emit_support_tinfos (emit_support_tinfos_callback callback
)
23309 extern tree ix86_float16_type_node
;
23310 extern tree ix86_bf16_type_node
;
23314 if (!float16_type_node
)
23315 float16_type_node
= ix86_float16_type_node
;
23316 if (!bfloat16_type_node
)
23317 bfloat16_type_node
= ix86_bf16_type_node
;
23318 callback (float16_type_node
);
23319 callback (bfloat16_type_node
);
23320 float16_type_node
= NULL_TREE
;
23321 bfloat16_type_node
= NULL_TREE
;
23325 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
23328 ix86_stack_protect_guard (void)
23330 if (TARGET_SSP_TLS_GUARD
)
23332 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
23333 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
23334 tree type
= build_qualified_type (type_node
, qual
);
23337 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str
))
23339 t
= ix86_tls_stack_chk_guard_decl
;
23346 (UNKNOWN_LOCATION
, VAR_DECL
,
23347 get_identifier (ix86_stack_protector_guard_symbol_str
),
23349 TREE_STATIC (t
) = 1;
23350 TREE_PUBLIC (t
) = 1;
23351 DECL_EXTERNAL (t
) = 1;
23353 TREE_THIS_VOLATILE (t
) = 1;
23354 DECL_ARTIFICIAL (t
) = 1;
23355 DECL_IGNORED_P (t
) = 1;
23357 /* Do not share RTL as the declaration is visible outside of
23358 current function. */
23360 RTX_FLAG (x
, used
) = 1;
23362 ix86_tls_stack_chk_guard_decl
= t
;
23367 tree asptrtype
= build_pointer_type (type
);
23369 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
23370 t
= build2 (MEM_REF
, asptrtype
, t
,
23371 build_int_cst (asptrtype
, 0));
23372 TREE_THIS_VOLATILE (t
) = 1;
23378 return default_stack_protect_guard ();
23381 /* For 32-bit code we can save PIC register setup by using
23382 __stack_chk_fail_local hidden function instead of calling
23383 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
23384 register, so it is better to call __stack_chk_fail directly. */
23386 static tree ATTRIBUTE_UNUSED
23387 ix86_stack_protect_fail (void)
23389 return TARGET_64BIT
23390 ? default_external_stack_protect_fail ()
23391 : default_hidden_stack_protect_fail ();
23394 /* Select a format to encode pointers in exception handling data. CODE
23395 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
23396 true if the symbol may be affected by dynamic relocations.
23398 ??? All x86 object file formats are capable of representing this.
23399 After all, the relocation needed is the same as for the call insn.
23400 Whether or not a particular assembler allows us to enter such, I
23401 guess we'll have to see. */
23404 asm_preferred_eh_data_format (int code
, int global
)
23406 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
23407 if (flag_pic
|| TARGET_PECOFF
|| !ix86_direct_extern_access
)
23409 int type
= DW_EH_PE_sdata8
;
23410 if (ptr_mode
== SImode
23411 || ix86_cmodel
== CM_SMALL_PIC
23412 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
23413 type
= DW_EH_PE_sdata4
;
23414 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
23417 if (ix86_cmodel
== CM_SMALL
23418 || (ix86_cmodel
== CM_MEDIUM
&& code
))
23419 return DW_EH_PE_udata4
;
23421 return DW_EH_PE_absptr
;
23424 /* Implement targetm.vectorize.builtin_vectorization_cost. */
23426 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
23430 machine_mode mode
= TImode
;
23432 if (vectype
!= NULL
)
23434 fp
= FLOAT_TYPE_P (vectype
);
23435 mode
= TYPE_MODE (vectype
);
23438 switch (type_of_cost
)
23441 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
23444 /* load/store costs are relative to register move which is 2. Recompute
23445 it to COSTS_N_INSNS so everything have same base. */
23446 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
23447 : ix86_cost
->int_load
[2]) / 2;
23450 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
23451 : ix86_cost
->int_store
[2]) / 2;
23454 return ix86_vec_cost (mode
,
23455 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
);
23458 index
= sse_store_index (mode
);
23459 /* See PR82713 - we may end up being called on non-vector type. */
23462 return COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2;
23465 index
= sse_store_index (mode
);
23466 /* See PR82713 - we may end up being called on non-vector type. */
23469 return COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2;
23471 case vec_to_scalar
:
23472 case scalar_to_vec
:
23473 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
23475 /* We should have separate costs for unaligned loads and gather/scatter.
23476 Do that incrementally. */
23477 case unaligned_load
:
23478 index
= sse_store_index (mode
);
23479 /* See PR82713 - we may end up being called on non-vector type. */
23482 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_load
[index
]) / 2;
23484 case unaligned_store
:
23485 index
= sse_store_index (mode
);
23486 /* See PR82713 - we may end up being called on non-vector type. */
23489 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_store
[index
]) / 2;
23491 case vector_gather_load
:
23492 return ix86_vec_cost (mode
,
23494 (ix86_cost
->gather_static
23495 + ix86_cost
->gather_per_elt
23496 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
23498 case vector_scatter_store
:
23499 return ix86_vec_cost (mode
,
23501 (ix86_cost
->scatter_static
23502 + ix86_cost
->scatter_per_elt
23503 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
23505 case cond_branch_taken
:
23506 return ix86_cost
->cond_taken_branch_cost
;
23508 case cond_branch_not_taken
:
23509 return ix86_cost
->cond_not_taken_branch_cost
;
23512 case vec_promote_demote
:
23513 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
23515 case vec_construct
:
23517 int n
= TYPE_VECTOR_SUBPARTS (vectype
);
23518 /* N - 1 element inserts into an SSE vector, the possible
23519 GPR -> XMM move is accounted for in add_stmt_cost. */
23520 if (GET_MODE_BITSIZE (mode
) <= 128)
23521 return (n
- 1) * ix86_cost
->sse_op
;
23522 /* One vinserti128 for combining two SSE vectors for AVX256. */
23523 else if (GET_MODE_BITSIZE (mode
) == 256)
23524 return ((n
- 2) * ix86_cost
->sse_op
23525 + ix86_vec_cost (mode
, ix86_cost
->addss
));
23526 /* One vinserti64x4 and two vinserti128 for combining SSE
23527 and AVX256 vectors to AVX512. */
23528 else if (GET_MODE_BITSIZE (mode
) == 512)
23529 return ((n
- 4) * ix86_cost
->sse_op
23530 + 3 * ix86_vec_cost (mode
, ix86_cost
->addss
));
23531 gcc_unreachable ();
23535 gcc_unreachable ();
23540 /* This function returns the calling abi specific va_list type node.
23541 It returns the FNDECL specific va_list type. */
23544 ix86_fn_abi_va_list (tree fndecl
)
23547 return va_list_type_node
;
23548 gcc_assert (fndecl
!= NULL_TREE
);
23550 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
23551 return ms_va_list_type_node
;
23553 return sysv_va_list_type_node
;
23556 /* Returns the canonical va_list type specified by TYPE. If there
23557 is no valid TYPE provided, it return NULL_TREE. */
23560 ix86_canonical_va_list_type (tree type
)
23564 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
23565 return ms_va_list_type_node
;
23567 if ((TREE_CODE (type
) == ARRAY_TYPE
23568 && integer_zerop (array_type_nelts (type
)))
23569 || POINTER_TYPE_P (type
))
23571 tree elem_type
= TREE_TYPE (type
);
23572 if (TREE_CODE (elem_type
) == RECORD_TYPE
23573 && lookup_attribute ("sysv_abi va_list",
23574 TYPE_ATTRIBUTES (elem_type
)))
23575 return sysv_va_list_type_node
;
23581 return std_canonical_va_list_type (type
);
23584 /* Iterate through the target-specific builtin types for va_list.
23585 IDX denotes the iterator, *PTREE is set to the result type of
23586 the va_list builtin, and *PNAME to its internal type.
23587 Returns zero if there is no element for this index, otherwise
23588 IDX should be increased upon the next call.
23589 Note, do not iterate a base builtin's name like __builtin_va_list.
23590 Used from c_common_nodes_and_builtins. */
23593 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
23603 *ptree
= ms_va_list_type_node
;
23604 *pname
= "__builtin_ms_va_list";
23608 *ptree
= sysv_va_list_type_node
;
23609 *pname
= "__builtin_sysv_va_list";
23617 #undef TARGET_SCHED_DISPATCH
23618 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
23619 #undef TARGET_SCHED_DISPATCH_DO
23620 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
23621 #undef TARGET_SCHED_REASSOCIATION_WIDTH
23622 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
23623 #undef TARGET_SCHED_REORDER
23624 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
23625 #undef TARGET_SCHED_ADJUST_PRIORITY
23626 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
23627 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
23628 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
23629 ix86_dependencies_evaluation_hook
23632 /* Implementation of reassociation_width target hook used by
23633 reassoc phase to identify parallelism level in reassociated
23634 tree. Statements tree_code is passed in OPC. Arguments type
23635 is passed in MODE. */
23638 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
23642 if (VECTOR_MODE_P (mode
))
23645 if (INTEGRAL_MODE_P (mode
))
23646 width
= ix86_cost
->reassoc_vec_int
;
23647 else if (FLOAT_MODE_P (mode
))
23648 width
= ix86_cost
->reassoc_vec_fp
;
23653 /* Integer vector instructions execute in FP unit
23654 and can execute 3 additions and one multiplication per cycle. */
23655 if ((ix86_tune
== PROCESSOR_ZNVER1
|| ix86_tune
== PROCESSOR_ZNVER2
23656 || ix86_tune
== PROCESSOR_ZNVER3
|| ix86_tune
== PROCESSOR_ZNVER4
)
23657 && INTEGRAL_MODE_P (mode
) && op
!= PLUS
&& op
!= MINUS
)
23660 /* Account for targets that splits wide vectors into multiple parts. */
23661 if (TARGET_AVX512_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 256)
23662 div
= GET_MODE_BITSIZE (mode
) / 256;
23663 else if (TARGET_AVX256_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 128)
23664 div
= GET_MODE_BITSIZE (mode
) / 128;
23665 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
23666 div
= GET_MODE_BITSIZE (mode
) / 64;
23667 width
= (width
+ div
- 1) / div
;
23670 else if (INTEGRAL_MODE_P (mode
))
23671 width
= ix86_cost
->reassoc_int
;
23672 else if (FLOAT_MODE_P (mode
))
23673 width
= ix86_cost
->reassoc_fp
;
23675 /* Avoid using too many registers in 32bit mode. */
23676 if (!TARGET_64BIT
&& width
> 2)
23681 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
23682 place emms and femms instructions. */
23684 static machine_mode
23685 ix86_preferred_simd_mode (scalar_mode mode
)
23693 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
23695 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23701 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
23703 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23709 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
23711 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23717 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
23719 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23725 if (TARGET_AVX512FP16
)
23727 if (TARGET_AVX512VL
)
23729 if (TARGET_PREFER_AVX128
)
23731 else if (TARGET_PREFER_AVX256
)
23739 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
23741 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23747 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
23749 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23751 else if (TARGET_SSE2
)
23760 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
23761 vectors. If AVX512F is enabled then try vectorizing with 512bit,
23762 256bit and 128bit vectors. */
23764 static unsigned int
23765 ix86_autovectorize_vector_modes (vector_modes
*modes
, bool all
)
23767 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
23769 modes
->safe_push (V64QImode
);
23770 modes
->safe_push (V32QImode
);
23771 modes
->safe_push (V16QImode
);
23773 else if (TARGET_AVX512F
&& all
)
23775 modes
->safe_push (V32QImode
);
23776 modes
->safe_push (V16QImode
);
23777 modes
->safe_push (V64QImode
);
23779 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
23781 modes
->safe_push (V32QImode
);
23782 modes
->safe_push (V16QImode
);
23784 else if (TARGET_AVX
&& all
)
23786 modes
->safe_push (V16QImode
);
23787 modes
->safe_push (V32QImode
);
23789 else if (TARGET_SSE2
)
23790 modes
->safe_push (V16QImode
);
23792 if (TARGET_MMX_WITH_SSE
)
23793 modes
->safe_push (V8QImode
);
23796 modes
->safe_push (V4QImode
);
23801 /* Implemenation of targetm.vectorize.get_mask_mode. */
23803 static opt_machine_mode
23804 ix86_get_mask_mode (machine_mode data_mode
)
23806 unsigned vector_size
= GET_MODE_SIZE (data_mode
);
23807 unsigned nunits
= GET_MODE_NUNITS (data_mode
);
23808 unsigned elem_size
= vector_size
/ nunits
;
23810 /* Scalar mask case. */
23811 if ((TARGET_AVX512F
&& vector_size
== 64)
23812 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
23816 || (TARGET_AVX512BW
&& (elem_size
== 1 || elem_size
== 2)))
23817 return smallest_int_mode_for_size (nunits
);
23820 scalar_int_mode elem_mode
23821 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
23823 gcc_assert (elem_size
* nunits
== vector_size
);
23825 return mode_for_vector (elem_mode
, nunits
);
23830 /* Return class of registers which could be used for pseudo of MODE
23831 and of class RCLASS for spilling instead of memory. Return NO_REGS
23832 if it is not possible or non-profitable. */
23834 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23837 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
23839 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
23841 && TARGET_INTER_UNIT_MOVES_TO_VEC
23842 && TARGET_INTER_UNIT_MOVES_FROM_VEC
23843 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
23844 && INTEGER_CLASS_P (rclass
))
23845 return ALL_SSE_REGS
;
23849 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
23850 but returns a lower bound. */
23852 static unsigned int
23853 ix86_max_noce_ifcvt_seq_cost (edge e
)
23855 bool predictable_p
= predictable_edge_p (e
);
23858 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost
))
23859 return param_max_rtl_if_conversion_predictable_cost
;
23863 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost
))
23864 return param_max_rtl_if_conversion_unpredictable_cost
;
23867 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
23870 /* Return true if SEQ is a good candidate as a replacement for the
23871 if-convertible sequence described in IF_INFO. */
23874 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
23876 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
23879 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
23880 Maybe we should allow even more conditional moves as long as they
23881 are used far enough not to stall the CPU, or also consider
23882 IF_INFO->TEST_BB succ edge probabilities. */
23883 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
23885 rtx set
= single_set (insn
);
23888 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
23890 rtx src
= SET_SRC (set
);
23891 machine_mode mode
= GET_MODE (src
);
23892 if (GET_MODE_CLASS (mode
) != MODE_INT
23893 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
23895 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
23896 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
23898 /* insn is CMOV or FCMOV. */
23899 if (++cmov_cnt
> 1)
23903 return default_noce_conversion_profitable_p (seq
, if_info
);
23906 /* x86-specific vector costs. */
23907 class ix86_vector_costs
: public vector_costs
23909 using vector_costs::vector_costs
;
23911 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
23912 stmt_vec_info stmt_info
, slp_tree node
,
23913 tree vectype
, int misalign
,
23914 vect_cost_model_location where
) override
;
23915 void finish_cost (const vector_costs
*) override
;
23918 /* Implement targetm.vectorize.create_costs. */
23920 static vector_costs
*
23921 ix86_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
23923 return new ix86_vector_costs (vinfo
, costing_for_scalar
);
23927 ix86_vector_costs::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
23928 stmt_vec_info stmt_info
, slp_tree node
,
23929 tree vectype
, int misalign
,
23930 vect_cost_model_location where
)
23932 unsigned retval
= 0;
23934 = (kind
== scalar_stmt
|| kind
== scalar_load
|| kind
== scalar_store
);
23935 int stmt_cost
= - 1;
23938 machine_mode mode
= scalar_p
? SImode
: TImode
;
23940 if (vectype
!= NULL
)
23942 fp
= FLOAT_TYPE_P (vectype
);
23943 mode
= TYPE_MODE (vectype
);
23945 mode
= TYPE_MODE (TREE_TYPE (vectype
));
23948 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
23950 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
23952 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
23953 /*machine_mode inner_mode = mode;
23954 if (VECTOR_MODE_P (mode))
23955 inner_mode = GET_MODE_INNER (mode);*/
23960 case POINTER_PLUS_EXPR
:
23962 if (kind
== scalar_stmt
)
23964 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
23965 stmt_cost
= ix86_cost
->addss
;
23966 else if (X87_FLOAT_MODE_P (mode
))
23967 stmt_cost
= ix86_cost
->fadd
;
23969 stmt_cost
= ix86_cost
->add
;
23972 stmt_cost
= ix86_vec_cost (mode
, fp
? ix86_cost
->addss
23973 : ix86_cost
->sse_op
);
23977 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
23978 take it as MULT_EXPR. */
23979 case MULT_HIGHPART_EXPR
:
23980 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
23982 /* There's no direct instruction for WIDEN_MULT_EXPR,
23983 take emulation into account. */
23984 case WIDEN_MULT_EXPR
:
23985 stmt_cost
= ix86_widen_mult_cost (ix86_cost
, mode
,
23986 TYPE_UNSIGNED (vectype
));
23990 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
23991 stmt_cost
= ix86_cost
->sse_op
;
23992 else if (X87_FLOAT_MODE_P (mode
))
23993 stmt_cost
= ix86_cost
->fchs
;
23994 else if (VECTOR_MODE_P (mode
))
23995 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
23997 stmt_cost
= ix86_cost
->add
;
23999 case TRUNC_DIV_EXPR
:
24000 case CEIL_DIV_EXPR
:
24001 case FLOOR_DIV_EXPR
:
24002 case ROUND_DIV_EXPR
:
24003 case TRUNC_MOD_EXPR
:
24004 case CEIL_MOD_EXPR
:
24005 case FLOOR_MOD_EXPR
:
24007 case ROUND_MOD_EXPR
:
24008 case EXACT_DIV_EXPR
:
24009 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
24017 tree op1
= gimple_assign_rhs1 (stmt_info
->stmt
);
24018 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
24019 stmt_cost
= ix86_shift_rotate_cost
24021 (subcode
== RSHIFT_EXPR
24022 && !TYPE_UNSIGNED (TREE_TYPE (op1
)))
24023 ? ASHIFTRT
: LSHIFTRT
, mode
,
24024 TREE_CODE (op2
) == INTEGER_CST
,
24025 cst_and_fits_in_hwi (op2
)
24026 ? int_cst_value (op2
) : -1,
24027 false, false, NULL
, NULL
);
24031 /* Only sign-conversions are free. */
24032 if (tree_nop_conversion_p
24033 (TREE_TYPE (gimple_assign_lhs (stmt_info
->stmt
)),
24034 TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
))))
24046 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
24047 stmt_cost
= ix86_cost
->sse_op
;
24048 else if (VECTOR_MODE_P (mode
))
24049 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
24051 stmt_cost
= ix86_cost
->add
;
24059 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
24062 && (cfn
= gimple_call_combined_fn (stmt_info
->stmt
)) != CFN_LAST
)
24066 stmt_cost
= ix86_vec_cost (mode
,
24067 mode
== SFmode
? ix86_cost
->fmass
24068 : ix86_cost
->fmasd
);
24071 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
24077 /* If we do elementwise loads into a vector then we are bound by
24078 latency and execution resources for the many scalar loads
24079 (AGU and load ports). Try to account for this by scaling the
24080 construction cost by the number of elements involved. */
24081 if ((kind
== vec_construct
|| kind
== vec_to_scalar
)
24083 && (STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
24084 || STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
24085 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
24086 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info
)))
24088 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_GATHER_SCATTER
))
24090 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
24091 stmt_cost
*= (TYPE_VECTOR_SUBPARTS (vectype
) + 1);
24093 else if ((kind
== vec_construct
|| kind
== scalar_to_vec
)
24095 && SLP_TREE_DEF_TYPE (node
) == vect_external_def
24096 && INTEGRAL_TYPE_P (TREE_TYPE (vectype
)))
24098 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
24101 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
24102 if (TREE_CODE (op
) == SSA_NAME
)
24103 TREE_VISITED (op
) = 0;
24104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
24106 if (TREE_CODE (op
) != SSA_NAME
24107 || TREE_VISITED (op
))
24109 TREE_VISITED (op
) = 1;
24110 gimple
*def
= SSA_NAME_DEF_STMT (op
);
24112 if (is_gimple_assign (def
)
24113 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def
))
24114 && ((tem
= gimple_assign_rhs1 (def
)), true)
24115 && TREE_CODE (tem
) == SSA_NAME
24116 /* A sign-change expands to nothing. */
24117 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def
)),
24119 def
= SSA_NAME_DEF_STMT (tem
);
24120 /* When the component is loaded from memory we can directly
24121 move it to a vector register, otherwise we have to go
24122 via a GPR or via vpinsr which involves similar cost.
24123 Likewise with a BIT_FIELD_REF extracting from a vector
24124 register we can hope to avoid using a GPR. */
24125 if (!is_gimple_assign (def
)
24126 || ((!gimple_assign_load_p (def
)
24128 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op
))) == 1))
24129 && (gimple_assign_rhs_code (def
) != BIT_FIELD_REF
24130 || !VECTOR_TYPE_P (TREE_TYPE
24131 (TREE_OPERAND (gimple_assign_rhs1 (def
), 0))))))
24132 stmt_cost
+= ix86_cost
->sse_to_integer
;
24134 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
24135 if (TREE_CODE (op
) == SSA_NAME
)
24136 TREE_VISITED (op
) = 0;
24138 if (stmt_cost
== -1)
24139 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
24141 /* Penalize DFmode vector operations for Bonnell. */
24142 if (TARGET_CPU_P (BONNELL
) && kind
== vector_stmt
24143 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
24144 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
24146 /* Statements in an inner loop relative to the loop being
24147 vectorized are weighted more heavily. The value here is
24148 arbitrary and could potentially be improved with analysis. */
24149 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
24151 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
24152 for Silvermont as it has out of order integer pipeline and can execute
24153 2 scalar instruction per tick, but has in order SIMD pipeline. */
24154 if ((TARGET_CPU_P (SILVERMONT
) || TARGET_CPU_P (GOLDMONT
)
24155 || TARGET_CPU_P (GOLDMONT_PLUS
) || TARGET_CPU_P (INTEL
))
24156 && stmt_info
&& stmt_info
->stmt
)
24158 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
24159 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
24160 retval
= (retval
* 17) / 10;
24163 m_costs
[where
] += retval
;
24169 ix86_vector_costs::finish_cost (const vector_costs
*scalar_costs
)
24171 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
);
24172 if (loop_vinfo
&& !m_costing_for_scalar
)
24174 /* We are currently not asking the vectorizer to compare costs
24175 between different vector mode sizes. When using predication
24176 that will end up always choosing the prefered mode size even
24177 if there's a smaller mode covering all lanes. Test for this
24178 situation and artificially reject the larger mode attempt.
24179 ??? We currently lack masked ops for sub-SSE sized modes,
24180 so we could restrict this rejection to AVX and AVX512 modes
24181 but error on the safe side for now. */
24182 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
)
24183 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo
)
24184 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
24185 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
).to_constant ())
24186 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo
))))
24187 m_costs
[vect_body
] = INT_MAX
;
24190 vector_costs::finish_cost (scalar_costs
);
24193 /* Validate target specific memory model bits in VAL. */
24195 static unsigned HOST_WIDE_INT
24196 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
24198 enum memmodel model
= memmodel_from_int (val
);
24201 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
24203 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
24205 warning (OPT_Winvalid_memory_model
,
24206 "unknown architecture specific memory model");
24207 return MEMMODEL_SEQ_CST
;
24209 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
24210 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
24212 warning (OPT_Winvalid_memory_model
,
24213 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
24215 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
24217 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
24219 warning (OPT_Winvalid_memory_model
,
24220 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
24222 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
24227 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
24228 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
24229 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
24230 or number of vecsize_mangle variants that should be emitted. */
24233 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
24234 struct cgraph_simd_clone
*clonei
,
24235 tree base_type
, int num
,
24240 if (clonei
->simdlen
24241 && (clonei
->simdlen
< 2
24242 || clonei
->simdlen
> 1024
24243 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
24246 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
24247 "unsupported simdlen %wd", clonei
->simdlen
.to_constant ());
24251 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
24252 if (TREE_CODE (ret_type
) != VOID_TYPE
)
24253 switch (TYPE_MODE (ret_type
))
24261 /* case E_SCmode: */
24262 /* case E_DCmode: */
24263 if (!AGGREGATE_TYPE_P (ret_type
))
24268 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
24269 "unsupported return type %qT for simd", ret_type
);
24275 tree type_arg_types
= TYPE_ARG_TYPES (TREE_TYPE (node
->decl
));
24276 bool decl_arg_p
= (node
->definition
|| type_arg_types
== NULL_TREE
);
24278 for (t
= (decl_arg_p
? DECL_ARGUMENTS (node
->decl
) : type_arg_types
), i
= 0;
24279 t
&& t
!= void_list_node
; t
= TREE_CHAIN (t
), i
++)
24281 tree arg_type
= decl_arg_p
? TREE_TYPE (t
) : TREE_VALUE (t
);
24282 switch (TYPE_MODE (arg_type
))
24290 /* case E_SCmode: */
24291 /* case E_DCmode: */
24292 if (!AGGREGATE_TYPE_P (arg_type
))
24296 if (clonei
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
)
24299 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
24300 "unsupported argument type %qT for simd", arg_type
);
24305 if (!TREE_PUBLIC (node
->decl
) || !explicit_p
)
24307 /* If the function isn't exported, we can pick up just one ISA
24309 if (TARGET_AVX512F
)
24310 clonei
->vecsize_mangle
= 'e';
24311 else if (TARGET_AVX2
)
24312 clonei
->vecsize_mangle
= 'd';
24313 else if (TARGET_AVX
)
24314 clonei
->vecsize_mangle
= 'c';
24316 clonei
->vecsize_mangle
= 'b';
24321 clonei
->vecsize_mangle
= "bcde"[num
];
24324 clonei
->mask_mode
= VOIDmode
;
24325 switch (clonei
->vecsize_mangle
)
24328 clonei
->vecsize_int
= 128;
24329 clonei
->vecsize_float
= 128;
24332 clonei
->vecsize_int
= 128;
24333 clonei
->vecsize_float
= 256;
24336 clonei
->vecsize_int
= 256;
24337 clonei
->vecsize_float
= 256;
24340 clonei
->vecsize_int
= 512;
24341 clonei
->vecsize_float
= 512;
24342 if (TYPE_MODE (base_type
) == QImode
)
24343 clonei
->mask_mode
= DImode
;
24345 clonei
->mask_mode
= SImode
;
24348 if (clonei
->simdlen
== 0)
24350 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
24351 clonei
->simdlen
= clonei
->vecsize_int
;
24353 clonei
->simdlen
= clonei
->vecsize_float
;
24354 clonei
->simdlen
= clonei
->simdlen
24355 / GET_MODE_BITSIZE (TYPE_MODE (base_type
));
24357 else if (clonei
->simdlen
> 16)
24359 /* For compatibility with ICC, use the same upper bounds
24360 for simdlen. In particular, for CTYPE below, use the return type,
24361 unless the function returns void, in that case use the characteristic
24362 type. If it is possible for given SIMDLEN to pass CTYPE value
24363 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
24364 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
24365 emit corresponding clone. */
24366 tree ctype
= ret_type
;
24367 if (VOID_TYPE_P (ret_type
))
24369 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
24370 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
24371 cnt
/= clonei
->vecsize_int
;
24373 cnt
/= clonei
->vecsize_float
;
24374 if (cnt
> (TARGET_64BIT
? 16 : 8))
24377 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
24378 "unsupported simdlen %wd",
24379 clonei
->simdlen
.to_constant ());
24386 /* If SIMD clone NODE can't be used in a vectorized loop
24387 in current function, return -1, otherwise return a badness of using it
24388 (0 if it is most desirable from vecsize_mangle point of view, 1
24389 slightly less desirable, etc.). */
24392 ix86_simd_clone_usable (struct cgraph_node
*node
)
24394 switch (node
->simdclone
->vecsize_mangle
)
24401 return TARGET_AVX512F
? 3 : TARGET_AVX2
? 2 : 1;
24405 return TARGET_AVX512F
? 2 : TARGET_AVX2
? 1 : 0;
24409 return TARGET_AVX512F
? 1 : 0;
24411 if (!TARGET_AVX512F
)
24415 gcc_unreachable ();
24419 /* This function adjusts the unroll factor based on
24420 the hardware capabilities. For ex, bdver3 has
24421 a loop buffer which makes unrolling of smaller
24422 loops less important. This function decides the
24423 unroll factor using number of memory references
24424 (value 32 is used) as a heuristic. */
24427 ix86_loop_unroll_adjust (unsigned nunroll
, class loop
*loop
)
24432 unsigned mem_count
= 0;
24434 /* Unroll small size loop when unroll factor is not explicitly
24436 if (ix86_unroll_only_small_loops
&& !loop
->unroll
)
24438 if (loop
->ninsns
<= ix86_cost
->small_unroll_ninsns
)
24439 return MIN (nunroll
, ix86_cost
->small_unroll_factor
);
24444 if (!TARGET_ADJUST_UNROLL
)
24447 /* Count the number of memory references within the loop body.
24448 This value determines the unrolling factor for bdver3 and bdver4
24450 subrtx_iterator::array_type array
;
24451 bbs
= get_loop_body (loop
);
24452 for (i
= 0; i
< loop
->num_nodes
; i
++)
24453 FOR_BB_INSNS (bbs
[i
], insn
)
24454 if (NONDEBUG_INSN_P (insn
))
24455 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
24456 if (const_rtx x
= *iter
)
24459 machine_mode mode
= GET_MODE (x
);
24460 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
24468 if (mem_count
&& mem_count
<=32)
24469 return MIN (nunroll
, 32 / mem_count
);
24475 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
24478 ix86_float_exceptions_rounding_supported_p (void)
24480 /* For x87 floating point with standard excess precision handling,
24481 there is no adddf3 pattern (since x87 floating point only has
24482 XFmode operations) so the default hook implementation gets this
24484 return TARGET_80387
|| (TARGET_SSE
&& TARGET_SSE_MATH
);
24487 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
24490 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
24492 if (!TARGET_80387
&& !(TARGET_SSE
&& TARGET_SSE_MATH
))
24494 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
24497 tree fenv_index_type
= build_index_type (size_int (6));
24498 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
24499 tree fenv_var
= create_tmp_var_raw (fenv_type
);
24500 TREE_ADDRESSABLE (fenv_var
) = 1;
24501 tree fenv_ptr
= build_pointer_type (fenv_type
);
24502 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
24503 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
24504 tree fnstenv
= get_ix86_builtin (IX86_BUILTIN_FNSTENV
);
24505 tree fldenv
= get_ix86_builtin (IX86_BUILTIN_FLDENV
);
24506 tree fnstsw
= get_ix86_builtin (IX86_BUILTIN_FNSTSW
);
24507 tree fnclex
= get_ix86_builtin (IX86_BUILTIN_FNCLEX
);
24508 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
24509 tree hold_fnclex
= build_call_expr (fnclex
, 0);
24510 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
24511 NULL_TREE
, NULL_TREE
);
24512 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
24514 *clear
= build_call_expr (fnclex
, 0);
24515 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
24516 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
24517 tree sw_mod
= build4 (TARGET_EXPR
, short_unsigned_type_node
, sw_var
,
24518 fnstsw_call
, NULL_TREE
, NULL_TREE
);
24519 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
24520 tree update_mod
= build4 (TARGET_EXPR
, integer_type_node
,
24521 exceptions_var
, exceptions_x87
,
24522 NULL_TREE
, NULL_TREE
);
24523 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
24524 sw_mod
, update_mod
);
24525 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
24526 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
24528 if (TARGET_SSE
&& TARGET_SSE_MATH
)
24530 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
24531 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
24532 tree stmxcsr
= get_ix86_builtin (IX86_BUILTIN_STMXCSR
);
24533 tree ldmxcsr
= get_ix86_builtin (IX86_BUILTIN_LDMXCSR
);
24534 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
24535 tree hold_assign_orig
= build4 (TARGET_EXPR
, unsigned_type_node
,
24536 mxcsr_orig_var
, stmxcsr_hold_call
,
24537 NULL_TREE
, NULL_TREE
);
24538 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
24540 build_int_cst (unsigned_type_node
, 0x1f80));
24541 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
24542 build_int_cst (unsigned_type_node
, 0xffffffc0));
24543 tree hold_assign_mod
= build4 (TARGET_EXPR
, unsigned_type_node
,
24544 mxcsr_mod_var
, hold_mod_val
,
24545 NULL_TREE
, NULL_TREE
);
24546 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
24547 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
24548 hold_assign_orig
, hold_assign_mod
);
24549 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
24550 ldmxcsr_hold_call
);
24552 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
24555 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
24557 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
24558 ldmxcsr_clear_call
);
24560 *clear
= ldmxcsr_clear_call
;
24561 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
24562 tree exceptions_sse
= fold_convert (integer_type_node
,
24563 stxmcsr_update_call
);
24566 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
24567 exceptions_var
, exceptions_sse
);
24568 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
24569 exceptions_var
, exceptions_mod
);
24570 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
24571 exceptions_assign
);
24574 *update
= build4 (TARGET_EXPR
, integer_type_node
, exceptions_var
,
24575 exceptions_sse
, NULL_TREE
, NULL_TREE
);
24576 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
24577 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
24578 ldmxcsr_update_call
);
24580 tree atomic_feraiseexcept
24581 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
24582 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
24583 1, exceptions_var
);
24584 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
24585 atomic_feraiseexcept_call
);
24588 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
24589 /* For i386, common symbol is local only for non-PIE binaries. For
24590 x86-64, common symbol is local only for non-PIE binaries or linker
24591 supports copy reloc in PIE binaries. */
24594 ix86_binds_local_p (const_tree exp
)
24596 bool direct_extern_access
24597 = (ix86_direct_extern_access
24598 && !(VAR_OR_FUNCTION_DECL_P (exp
)
24599 && lookup_attribute ("nodirect_extern_access",
24600 DECL_ATTRIBUTES (exp
))));
24601 if (!direct_extern_access
)
24602 ix86_has_no_direct_extern_access
= true;
24603 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true,
24604 direct_extern_access
,
24605 (direct_extern_access
24608 && HAVE_LD_PIE_COPYRELOC
!= 0))));
24611 /* If flag_pic or ix86_direct_extern_access is false, then neither
24612 local nor global relocs should be placed in readonly memory. */
24615 ix86_reloc_rw_mask (void)
24617 return (flag_pic
|| !ix86_direct_extern_access
) ? 3 : 0;
24621 /* If MEM is in the form of [base+offset], extract the two parts
24622 of address and set to BASE and OFFSET, otherwise return false. */
24625 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
24629 gcc_assert (MEM_P (mem
));
24631 addr
= XEXP (mem
, 0);
24633 if (GET_CODE (addr
) == CONST
)
24634 addr
= XEXP (addr
, 0);
24636 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
24639 *offset
= const0_rtx
;
24643 if (GET_CODE (addr
) == PLUS
24644 && (REG_P (XEXP (addr
, 0))
24645 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
24646 && CONST_INT_P (XEXP (addr
, 1)))
24648 *base
= XEXP (addr
, 0);
24649 *offset
= XEXP (addr
, 1);
24656 /* Given OPERANDS of consecutive load/store, check if we can merge
24657 them into move multiple. LOAD is true if they are load instructions.
24658 MODE is the mode of memory operands. */
24661 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
24664 HOST_WIDE_INT offval_1
, offval_2
, msize
;
24665 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
24669 mem_1
= operands
[1];
24670 mem_2
= operands
[3];
24671 reg_1
= operands
[0];
24672 reg_2
= operands
[2];
24676 mem_1
= operands
[0];
24677 mem_2
= operands
[2];
24678 reg_1
= operands
[1];
24679 reg_2
= operands
[3];
24682 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
24684 if (REGNO (reg_1
) != REGNO (reg_2
))
24687 /* Check if the addresses are in the form of [base+offset]. */
24688 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
24690 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
24693 /* Check if the bases are the same. */
24694 if (!rtx_equal_p (base_1
, base_2
))
24697 offval_1
= INTVAL (offset_1
);
24698 offval_2
= INTVAL (offset_2
);
24699 msize
= GET_MODE_SIZE (mode
);
24700 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
24701 if (offval_1
+ msize
!= offval_2
)
24707 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
24710 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
24711 optimization_type opt_type
)
24726 return opt_type
== OPTIMIZE_FOR_SPEED
;
24729 if (SSE_FLOAT_MODE_P (mode1
)
24731 && !flag_trapping_math
24733 && mode1
!= HFmode
)
24734 return opt_type
== OPTIMIZE_FOR_SPEED
;
24740 if (((SSE_FLOAT_MODE_P (mode1
)
24743 || mode1
== HFmode
)
24744 && !flag_trapping_math
)
24746 return opt_type
== OPTIMIZE_FOR_SPEED
;
24749 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p (mode1
);
24756 /* Address space support.
24758 This is not "far pointers" in the 16-bit sense, but an easy way
24759 to use %fs and %gs segment prefixes. Therefore:
24761 (a) All address spaces have the same modes,
24762 (b) All address spaces have the same addresss forms,
24763 (c) While %fs and %gs are technically subsets of the generic
24764 address space, they are probably not subsets of each other.
24765 (d) Since we have no access to the segment base register values
24766 without resorting to a system call, we cannot convert a
24767 non-default address space to a default address space.
24768 Therefore we do not claim %fs or %gs are subsets of generic.
24770 Therefore we can (mostly) use the default hooks. */
24772 /* All use of segmentation is assumed to make address 0 valid. */
24775 ix86_addr_space_zero_address_valid (addr_space_t as
)
24777 return as
!= ADDR_SPACE_GENERIC
;
24781 ix86_init_libfuncs (void)
24785 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
24786 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
24790 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
24791 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
24795 darwin_rename_builtins ();
24799 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
24800 FPU, assume that the fpcw is set to extended precision; when using
24801 only SSE, rounding is correct; when using both SSE and the FPU,
24802 the rounding precision is indeterminate, since either may be chosen
24803 apparently at random. */
24805 static enum flt_eval_method
24806 ix86_get_excess_precision (enum excess_precision_type type
)
24810 case EXCESS_PRECISION_TYPE_FAST
:
24811 /* The fastest type to promote to will always be the native type,
24812 whether that occurs with implicit excess precision or
24814 return TARGET_AVX512FP16
24815 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24816 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
24817 case EXCESS_PRECISION_TYPE_STANDARD
:
24818 case EXCESS_PRECISION_TYPE_IMPLICIT
:
24819 /* Otherwise, the excess precision we want when we are
24820 in a standards compliant mode, and the implicit precision we
24821 provide would be identical were it not for the unpredictable
24823 if (TARGET_AVX512FP16
&& TARGET_SSE_MATH
)
24824 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
24825 else if (!TARGET_80387
)
24826 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
24827 else if (!TARGET_MIX_SSE_I387
)
24829 if (!(TARGET_SSE
&& TARGET_SSE_MATH
))
24830 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
24831 else if (TARGET_SSE2
)
24832 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
24835 /* If we are in standards compliant mode, but we know we will
24836 calculate in unpredictable precision, return
24837 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
24838 excess precision if the target can't guarantee it will honor
24840 return (type
== EXCESS_PRECISION_TYPE_STANDARD
24841 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
24842 : FLT_EVAL_METHOD_UNPREDICTABLE
);
24843 case EXCESS_PRECISION_TYPE_FLOAT16
:
24845 && !(TARGET_SSE_MATH
&& TARGET_SSE
))
24846 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
24847 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
24849 gcc_unreachable ();
24852 return FLT_EVAL_METHOD_UNPREDICTABLE
;
24855 /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
24857 ix86_bitint_type_info (int n
, struct bitint_info
*info
)
24862 info
->limb_mode
= QImode
;
24864 info
->limb_mode
= HImode
;
24866 info
->limb_mode
= SImode
;
24868 info
->limb_mode
= DImode
;
24869 info
->big_endian
= false;
24870 info
->extended
= false;
24874 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
24875 decrements by exactly 2 no matter what the position was, there is no pushb.
24877 But as CIE data alignment factor on this arch is -4 for 32bit targets
24878 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
24879 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
24882 ix86_push_rounding (poly_int64 bytes
)
24884 return ROUND_UP (bytes
, UNITS_PER_WORD
);
24887 /* Use 8 bits metadata start from bit48 for LAM_U48,
24888 6 bits metadat start from bit57 for LAM_U57. */
24889 #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
24891 : (ix86_lam_type == lam_u57 ? 57 : 0))
24892 #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
24894 : (ix86_lam_type == lam_u57 ? 6 : 0))
24896 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
24898 ix86_memtag_can_tag_addresses ()
24900 return ix86_lam_type
!= lam_none
&& TARGET_LP64
;
24903 /* Implement TARGET_MEMTAG_TAG_SIZE. */
24905 ix86_memtag_tag_size ()
24907 return IX86_HWASAN_TAG_SIZE
;
24910 /* Implement TARGET_MEMTAG_SET_TAG. */
24912 ix86_memtag_set_tag (rtx untagged
, rtx tag
, rtx target
)
24914 /* default_memtag_insert_random_tag may
24915 generate tag with value more than 6 bits. */
24916 if (ix86_lam_type
== lam_u57
)
24918 unsigned HOST_WIDE_INT and_imm
24919 = (HOST_WIDE_INT_1U
<< IX86_HWASAN_TAG_SIZE
) - 1;
24921 emit_insn (gen_andqi3 (tag
, tag
, GEN_INT (and_imm
)));
24923 tag
= expand_simple_binop (Pmode
, ASHIFT
, tag
,
24924 GEN_INT (IX86_HWASAN_SHIFT
), NULL_RTX
,
24925 /* unsignedp = */1, OPTAB_WIDEN
);
24926 rtx ret
= expand_simple_binop (Pmode
, IOR
, untagged
, tag
, target
,
24927 /* unsignedp = */1, OPTAB_DIRECT
);
24931 /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
24933 ix86_memtag_extract_tag (rtx tagged_pointer
, rtx target
)
24935 rtx tag
= expand_simple_binop (Pmode
, LSHIFTRT
, tagged_pointer
,
24936 GEN_INT (IX86_HWASAN_SHIFT
), target
,
24937 /* unsignedp = */0,
24939 rtx ret
= gen_reg_rtx (QImode
);
24940 /* Mask off bit63 when LAM_U57. */
24941 if (ix86_lam_type
== lam_u57
)
24943 unsigned HOST_WIDE_INT and_imm
24944 = (HOST_WIDE_INT_1U
<< IX86_HWASAN_TAG_SIZE
) - 1;
24945 emit_insn (gen_andqi3 (ret
, gen_lowpart (QImode
, tag
),
24946 gen_int_mode (and_imm
, QImode
)));
24949 emit_move_insn (ret
, gen_lowpart (QImode
, tag
));
24953 /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
24955 ix86_memtag_untagged_pointer (rtx tagged_pointer
, rtx target
)
24957 /* Leave bit63 alone. */
24958 rtx tag_mask
= gen_int_mode (((HOST_WIDE_INT_1U
<< IX86_HWASAN_SHIFT
)
24959 + (HOST_WIDE_INT_1U
<< 63) - 1),
24961 rtx untagged_base
= expand_simple_binop (Pmode
, AND
, tagged_pointer
,
24962 tag_mask
, target
, true,
24964 gcc_assert (untagged_base
);
24965 return untagged_base
;
24968 /* Implement TARGET_MEMTAG_ADD_TAG. */
24970 ix86_memtag_add_tag (rtx base
, poly_int64 offset
, unsigned char tag_offset
)
24972 rtx base_tag
= gen_reg_rtx (QImode
);
24973 rtx base_addr
= gen_reg_rtx (Pmode
);
24974 rtx tagged_addr
= gen_reg_rtx (Pmode
);
24975 rtx new_tag
= gen_reg_rtx (QImode
);
24976 unsigned HOST_WIDE_INT and_imm
24977 = (HOST_WIDE_INT_1U
<< IX86_HWASAN_SHIFT
) - 1;
24979 /* When there's "overflow" in tag adding,
24980 need to mask the most significant bit off. */
24981 emit_move_insn (base_tag
, ix86_memtag_extract_tag (base
, NULL_RTX
));
24982 emit_move_insn (base_addr
,
24983 ix86_memtag_untagged_pointer (base
, NULL_RTX
));
24984 emit_insn (gen_add2_insn (base_tag
, gen_int_mode (tag_offset
, QImode
)));
24985 emit_move_insn (new_tag
, base_tag
);
24986 emit_insn (gen_andqi3 (new_tag
, new_tag
, gen_int_mode (and_imm
, QImode
)));
24987 emit_move_insn (tagged_addr
,
24988 ix86_memtag_set_tag (base_addr
, new_tag
, NULL_RTX
));
24989 return plus_constant (Pmode
, tagged_addr
, offset
);
24992 /* Target-specific selftests. */
24996 namespace selftest
{
24998 /* Verify that hard regs are dumped as expected (in compact mode). */
25001 ix86_test_dumping_hard_regs ()
25003 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
25004 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
25007 /* Test dumping an insn with repeated references to the same SCRATCH,
25008 to verify the rtx_reuse code. */
25011 ix86_test_dumping_memory_blockage ()
25013 set_new_first_and_last_insn (NULL
, NULL
);
25015 rtx pat
= gen_memory_blockage ();
25016 rtx_reuse_manager r
;
25017 r
.preprocess (pat
);
25019 /* Verify that the repeated references to the SCRATCH show use
25020 reuse IDS. The first should be prefixed with a reuse ID,
25021 and the second should be dumped as a "reuse_rtx" of that ID.
25022 The expected string assumes Pmode == DImode. */
25023 if (Pmode
== DImode
)
25024 ASSERT_RTL_DUMP_EQ_WITH_REUSE
25025 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
25027 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
25028 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
25031 /* Verify loading an RTL dump; specifically a dump of copying
25032 a param on x86_64 from a hard reg into the frame.
25033 This test is target-specific since the dump contains target-specific
25037 ix86_test_loading_dump_fragment_1 ()
25039 rtl_dump_test
t (SELFTEST_LOCATION
,
25040 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
25042 rtx_insn
*insn
= get_insn_by_uid (1);
25044 /* The block structure and indentation here is purely for
25045 readability; it mirrors the structure of the rtx. */
25048 rtx pat
= PATTERN (insn
);
25049 ASSERT_EQ (SET
, GET_CODE (pat
));
25051 rtx dest
= SET_DEST (pat
);
25052 ASSERT_EQ (MEM
, GET_CODE (dest
));
25053 /* Verify the "/c" was parsed. */
25054 ASSERT_TRUE (RTX_FLAG (dest
, call
));
25055 ASSERT_EQ (SImode
, GET_MODE (dest
));
25057 rtx addr
= XEXP (dest
, 0);
25058 ASSERT_EQ (PLUS
, GET_CODE (addr
));
25059 ASSERT_EQ (DImode
, GET_MODE (addr
));
25061 rtx lhs
= XEXP (addr
, 0);
25062 /* Verify that the "frame" REG was consolidated. */
25063 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
25066 rtx rhs
= XEXP (addr
, 1);
25067 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
25068 ASSERT_EQ (-4, INTVAL (rhs
));
25071 /* Verify the "[1 i+0 S4 A32]" was parsed. */
25072 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
25073 /* "i" should have been handled by synthesizing a global int
25074 variable named "i". */
25075 mem_expr
= MEM_EXPR (dest
);
25076 ASSERT_NE (mem_expr
, NULL
);
25077 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
25078 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
25079 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
25080 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
25082 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
25083 ASSERT_EQ (0, MEM_OFFSET (dest
));
25085 ASSERT_EQ (4, MEM_SIZE (dest
));
25087 ASSERT_EQ (32, MEM_ALIGN (dest
));
25090 rtx src
= SET_SRC (pat
);
25091 ASSERT_EQ (REG
, GET_CODE (src
));
25092 ASSERT_EQ (SImode
, GET_MODE (src
));
25093 ASSERT_EQ (5, REGNO (src
));
25094 tree reg_expr
= REG_EXPR (src
);
25095 /* "i" here should point to the same var as for the MEM_EXPR. */
25096 ASSERT_EQ (reg_expr
, mem_expr
);
25101 /* Verify that the RTL loader copes with a call_insn dump.
25102 This test is target-specific since the dump contains a target-specific
25106 ix86_test_loading_call_insn ()
25108 /* The test dump includes register "xmm0", where requires TARGET_SSE
25113 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
25115 rtx_insn
*insn
= get_insns ();
25116 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
25119 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
25121 rtx pat
= PATTERN (insn
);
25122 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
25124 /* Verify REG_NOTES. */
25126 /* "(expr_list:REG_CALL_DECL". */
25127 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
25128 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
25129 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
25131 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
25132 rtx_expr_list
*note1
= note0
->next ();
25133 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
25135 ASSERT_EQ (NULL
, note1
->next ());
25138 /* Verify CALL_INSN_FUNCTION_USAGE. */
25140 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
25141 rtx_expr_list
*usage
25142 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
25143 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
25144 ASSERT_EQ (DFmode
, GET_MODE (usage
));
25145 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
25146 ASSERT_EQ (NULL
, usage
->next ());
25150 /* Verify that the RTL loader copes a dump from print_rtx_function.
25151 This test is target-specific since the dump contains target-specific
25155 ix86_test_loading_full_dump ()
25157 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
25159 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
25161 rtx_insn
*insn_1
= get_insn_by_uid (1);
25162 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
25164 rtx_insn
*insn_7
= get_insn_by_uid (7);
25165 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
25166 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
25168 rtx_insn
*insn_15
= get_insn_by_uid (15);
25169 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
25170 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
25172 /* Verify crtl->return_rtx. */
25173 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
25174 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
25175 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
25178 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
25179 In particular, verify that it correctly loads the 2nd operand.
25180 This test is target-specific since these are machine-specific
25181 operands (and enums). */
25184 ix86_test_loading_unspec ()
25186 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
25188 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
25190 ASSERT_TRUE (cfun
);
25192 /* Test of an UNSPEC. */
25193 rtx_insn
*insn
= get_insns ();
25194 ASSERT_EQ (INSN
, GET_CODE (insn
));
25195 rtx set
= single_set (insn
);
25196 ASSERT_NE (NULL
, set
);
25197 rtx dst
= SET_DEST (set
);
25198 ASSERT_EQ (MEM
, GET_CODE (dst
));
25199 rtx src
= SET_SRC (set
);
25200 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
25201 ASSERT_EQ (BLKmode
, GET_MODE (src
));
25202 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
25204 rtx v0
= XVECEXP (src
, 0, 0);
25206 /* Verify that the two uses of the first SCRATCH have pointer
25208 rtx scratch_a
= XEXP (dst
, 0);
25209 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
25211 rtx scratch_b
= XEXP (v0
, 0);
25212 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
25214 ASSERT_EQ (scratch_a
, scratch_b
);
25216 /* Verify that the two mems are thus treated as equal. */
25217 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
25219 /* Verify that the insn is recognized. */
25220 ASSERT_NE(-1, recog_memoized (insn
));
25222 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
25223 insn
= NEXT_INSN (insn
);
25224 ASSERT_EQ (INSN
, GET_CODE (insn
));
25226 set
= single_set (insn
);
25227 ASSERT_NE (NULL
, set
);
25229 src
= SET_SRC (set
);
25230 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
25231 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
25234 /* Run all target-specific selftests. */
25237 ix86_run_selftests (void)
25239 ix86_test_dumping_hard_regs ();
25240 ix86_test_dumping_memory_blockage ();
25242 /* Various tests of loading RTL dumps, here because they contain
25243 ix86-isms (e.g. names of hard regs). */
25244 ix86_test_loading_dump_fragment_1 ();
25245 ix86_test_loading_call_insn ();
25246 ix86_test_loading_full_dump ();
25247 ix86_test_loading_unspec ();
25250 } // namespace selftest
25252 #endif /* CHECKING_P */
25254 /* Initialize the GCC target structure. */
25255 #undef TARGET_RETURN_IN_MEMORY
25256 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25258 #undef TARGET_LEGITIMIZE_ADDRESS
25259 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
25261 #undef TARGET_ATTRIBUTE_TABLE
25262 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25263 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
25264 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
25265 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25266 # undef TARGET_MERGE_DECL_ATTRIBUTES
25267 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25270 #undef TARGET_INVALID_CONVERSION
25271 #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
25273 #undef TARGET_INVALID_UNARY_OP
25274 #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
25276 #undef TARGET_INVALID_BINARY_OP
25277 #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
25279 #undef TARGET_COMP_TYPE_ATTRIBUTES
25280 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25282 #undef TARGET_INIT_BUILTINS
25283 #define TARGET_INIT_BUILTINS ix86_init_builtins
25284 #undef TARGET_BUILTIN_DECL
25285 #define TARGET_BUILTIN_DECL ix86_builtin_decl
25286 #undef TARGET_EXPAND_BUILTIN
25287 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25289 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25290 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25291 ix86_builtin_vectorized_function
25293 #undef TARGET_VECTORIZE_BUILTIN_GATHER
25294 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
25296 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
25297 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
25299 #undef TARGET_BUILTIN_RECIPROCAL
25300 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25302 #undef TARGET_ASM_FUNCTION_EPILOGUE
25303 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25305 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
25306 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
25307 ix86_print_patchable_function_entry
25309 #undef TARGET_ENCODE_SECTION_INFO
25310 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25311 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25313 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25316 #undef TARGET_ASM_OPEN_PAREN
25317 #define TARGET_ASM_OPEN_PAREN ""
25318 #undef TARGET_ASM_CLOSE_PAREN
25319 #define TARGET_ASM_CLOSE_PAREN ""
25321 #undef TARGET_ASM_BYTE_OP
25322 #define TARGET_ASM_BYTE_OP ASM_BYTE
25324 #undef TARGET_ASM_ALIGNED_HI_OP
25325 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25326 #undef TARGET_ASM_ALIGNED_SI_OP
25327 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25329 #undef TARGET_ASM_ALIGNED_DI_OP
25330 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25333 #undef TARGET_PROFILE_BEFORE_PROLOGUE
25334 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
25336 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
25337 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
25339 #undef TARGET_ASM_UNALIGNED_HI_OP
25340 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25341 #undef TARGET_ASM_UNALIGNED_SI_OP
25342 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25343 #undef TARGET_ASM_UNALIGNED_DI_OP
25344 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25346 #undef TARGET_PRINT_OPERAND
25347 #define TARGET_PRINT_OPERAND ix86_print_operand
25348 #undef TARGET_PRINT_OPERAND_ADDRESS
25349 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
25350 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
25351 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
25352 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
25353 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
25355 #undef TARGET_SCHED_INIT_GLOBAL
25356 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
25357 #undef TARGET_SCHED_ADJUST_COST
25358 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25359 #undef TARGET_SCHED_ISSUE_RATE
25360 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25361 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25362 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25363 ia32_multipass_dfa_lookahead
25364 #undef TARGET_SCHED_MACRO_FUSION_P
25365 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
25366 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
25367 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
25369 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25370 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25372 #undef TARGET_MEMMODEL_CHECK
25373 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
25375 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
25376 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
25379 #undef TARGET_HAVE_TLS
25380 #define TARGET_HAVE_TLS true
25382 #undef TARGET_CANNOT_FORCE_CONST_MEM
25383 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25384 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25385 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25387 #undef TARGET_DELEGITIMIZE_ADDRESS
25388 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25390 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
25391 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
25393 #undef TARGET_MS_BITFIELD_LAYOUT_P
25394 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25397 #undef TARGET_BINDS_LOCAL_P
25398 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25400 #undef TARGET_BINDS_LOCAL_P
25401 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
25403 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25404 #undef TARGET_BINDS_LOCAL_P
25405 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25408 #undef TARGET_ASM_OUTPUT_MI_THUNK
25409 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25410 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25411 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25413 #undef TARGET_ASM_FILE_START
25414 #define TARGET_ASM_FILE_START x86_file_start
25416 #undef TARGET_OPTION_OVERRIDE
25417 #define TARGET_OPTION_OVERRIDE ix86_option_override
25419 #undef TARGET_REGISTER_MOVE_COST
25420 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
25421 #undef TARGET_MEMORY_MOVE_COST
25422 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
25423 #undef TARGET_RTX_COSTS
25424 #define TARGET_RTX_COSTS ix86_rtx_costs
25425 #undef TARGET_ADDRESS_COST
25426 #define TARGET_ADDRESS_COST ix86_address_cost
25428 #undef TARGET_OVERLAP_OP_BY_PIECES_P
25429 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
25431 #undef TARGET_FLAGS_REGNUM
25432 #define TARGET_FLAGS_REGNUM FLAGS_REG
25433 #undef TARGET_FIXED_CONDITION_CODE_REGS
25434 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25435 #undef TARGET_CC_MODES_COMPATIBLE
25436 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25438 #undef TARGET_MACHINE_DEPENDENT_REORG
25439 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25441 #undef TARGET_BUILD_BUILTIN_VA_LIST
25442 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25444 #undef TARGET_FOLD_BUILTIN
25445 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
25447 #undef TARGET_GIMPLE_FOLD_BUILTIN
25448 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
25450 #undef TARGET_COMPARE_VERSION_PRIORITY
25451 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
25453 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
25454 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
25455 ix86_generate_version_dispatcher_body
25457 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
25458 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
25459 ix86_get_function_versions_dispatcher
25461 #undef TARGET_ENUM_VA_LIST_P
25462 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
25464 #undef TARGET_FN_ABI_VA_LIST
25465 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
25467 #undef TARGET_CANONICAL_VA_LIST_TYPE
25468 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
25470 #undef TARGET_EXPAND_BUILTIN_VA_START
25471 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25473 #undef TARGET_MD_ASM_ADJUST
25474 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
25476 #undef TARGET_C_EXCESS_PRECISION
25477 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
25478 #undef TARGET_C_BITINT_TYPE_INFO
25479 #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
25480 #undef TARGET_PROMOTE_PROTOTYPES
25481 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25482 #undef TARGET_PUSH_ARGUMENT
25483 #define TARGET_PUSH_ARGUMENT ix86_push_argument
25484 #undef TARGET_SETUP_INCOMING_VARARGS
25485 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25486 #undef TARGET_MUST_PASS_IN_STACK
25487 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25488 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
25489 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
25490 #undef TARGET_FUNCTION_ARG_ADVANCE
25491 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
25492 #undef TARGET_FUNCTION_ARG
25493 #define TARGET_FUNCTION_ARG ix86_function_arg
25494 #undef TARGET_INIT_PIC_REG
25495 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
25496 #undef TARGET_USE_PSEUDO_PIC_REG
25497 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
25498 #undef TARGET_FUNCTION_ARG_BOUNDARY
25499 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
25500 #undef TARGET_PASS_BY_REFERENCE
25501 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25502 #undef TARGET_INTERNAL_ARG_POINTER
25503 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25504 #undef TARGET_UPDATE_STACK_BOUNDARY
25505 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
25506 #undef TARGET_GET_DRAP_RTX
25507 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
25508 #undef TARGET_STRICT_ARGUMENT_NAMING
25509 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25510 #undef TARGET_STATIC_CHAIN
25511 #define TARGET_STATIC_CHAIN ix86_static_chain
25512 #undef TARGET_TRAMPOLINE_INIT
25513 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
25514 #undef TARGET_RETURN_POPS_ARGS
25515 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
25517 #undef TARGET_WARN_FUNC_RETURN
25518 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
25520 #undef TARGET_LEGITIMATE_COMBINED_INSN
25521 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
25523 #undef TARGET_ASAN_SHADOW_OFFSET
25524 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
25526 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25527 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25529 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25530 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25532 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
25533 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
25534 ix86_libgcc_floating_mode_supported_p
25536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25537 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25539 #undef TARGET_C_MODE_FOR_SUFFIX
25540 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25543 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25544 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25547 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25548 #undef TARGET_INSERT_ATTRIBUTES
25549 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25552 #undef TARGET_MANGLE_TYPE
25553 #define TARGET_MANGLE_TYPE ix86_mangle_type
25555 #undef TARGET_EMIT_SUPPORT_TINFOS
25556 #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
25558 #undef TARGET_STACK_PROTECT_GUARD
25559 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
25562 #undef TARGET_STACK_PROTECT_FAIL
25563 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25566 #undef TARGET_FUNCTION_VALUE
25567 #define TARGET_FUNCTION_VALUE ix86_function_value
25569 #undef TARGET_FUNCTION_VALUE_REGNO_P
25570 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
25572 #undef TARGET_ZERO_CALL_USED_REGS
25573 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
25575 #undef TARGET_PROMOTE_FUNCTION_MODE
25576 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
25578 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
25579 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
25581 #undef TARGET_MEMBER_TYPE_FORCES_BLK
25582 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
25584 #undef TARGET_INSTANTIATE_DECLS
25585 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
25587 #undef TARGET_SECONDARY_RELOAD
25588 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
25589 #undef TARGET_SECONDARY_MEMORY_NEEDED
25590 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
25591 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
25592 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
25594 #undef TARGET_CLASS_MAX_NREGS
25595 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
25597 #undef TARGET_PREFERRED_RELOAD_CLASS
25598 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
25599 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
25600 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
25601 #undef TARGET_CLASS_LIKELY_SPILLED_P
25602 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
25604 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25605 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
25606 ix86_builtin_vectorization_cost
25607 #undef TARGET_VECTORIZE_VEC_PERM_CONST
25608 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
25609 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
25610 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
25611 ix86_preferred_simd_mode
25612 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
25613 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
25614 ix86_split_reduction
25615 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
25616 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
25617 ix86_autovectorize_vector_modes
25618 #undef TARGET_VECTORIZE_GET_MASK_MODE
25619 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
25620 #undef TARGET_VECTORIZE_CREATE_COSTS
25621 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
25623 #undef TARGET_SET_CURRENT_FUNCTION
25624 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
25626 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
25627 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
25629 #undef TARGET_OPTION_SAVE
25630 #define TARGET_OPTION_SAVE ix86_function_specific_save
25632 #undef TARGET_OPTION_RESTORE
25633 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
25635 #undef TARGET_OPTION_POST_STREAM_IN
25636 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
25638 #undef TARGET_OPTION_PRINT
25639 #define TARGET_OPTION_PRINT ix86_function_specific_print
25641 #undef TARGET_OPTION_FUNCTION_VERSIONS
25642 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
25644 #undef TARGET_CAN_INLINE_P
25645 #define TARGET_CAN_INLINE_P ix86_can_inline_p
25647 #undef TARGET_LEGITIMATE_ADDRESS_P
25648 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
25650 #undef TARGET_REGISTER_PRIORITY
25651 #define TARGET_REGISTER_PRIORITY ix86_register_priority
25653 #undef TARGET_REGISTER_USAGE_LEVELING_P
25654 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
25656 #undef TARGET_LEGITIMATE_CONSTANT_P
25657 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
25659 #undef TARGET_COMPUTE_FRAME_LAYOUT
25660 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
25662 #undef TARGET_FRAME_POINTER_REQUIRED
25663 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
25665 #undef TARGET_CAN_ELIMINATE
25666 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
25668 #undef TARGET_EXTRA_LIVE_ON_ENTRY
25669 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
25671 #undef TARGET_ASM_CODE_END
25672 #define TARGET_ASM_CODE_END ix86_code_end
25674 #undef TARGET_CONDITIONAL_REGISTER_USAGE
25675 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
25677 #undef TARGET_CANONICALIZE_COMPARISON
25678 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
25680 #undef TARGET_LOOP_UNROLL_ADJUST
25681 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
25683 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
25684 #undef TARGET_SPILL_CLASS
25685 #define TARGET_SPILL_CLASS ix86_spill_class
25687 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
25688 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
25689 ix86_simd_clone_compute_vecsize_and_simdlen
25691 #undef TARGET_SIMD_CLONE_ADJUST
25692 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
25694 #undef TARGET_SIMD_CLONE_USABLE
25695 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
25697 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
25698 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
25700 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
25701 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
25702 ix86_float_exceptions_rounding_supported_p
25704 #undef TARGET_MODE_EMIT
25705 #define TARGET_MODE_EMIT ix86_emit_mode_set
25707 #undef TARGET_MODE_NEEDED
25708 #define TARGET_MODE_NEEDED ix86_mode_needed
25710 #undef TARGET_MODE_AFTER
25711 #define TARGET_MODE_AFTER ix86_mode_after
25713 #undef TARGET_MODE_ENTRY
25714 #define TARGET_MODE_ENTRY ix86_mode_entry
25716 #undef TARGET_MODE_EXIT
25717 #define TARGET_MODE_EXIT ix86_mode_exit
25719 #undef TARGET_MODE_PRIORITY
25720 #define TARGET_MODE_PRIORITY ix86_mode_priority
25722 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
25723 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
25725 #undef TARGET_OFFLOAD_OPTIONS
25726 #define TARGET_OFFLOAD_OPTIONS \
25727 ix86_offload_options
25729 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
25730 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
25732 #undef TARGET_OPTAB_SUPPORTED_P
25733 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
25735 #undef TARGET_HARD_REGNO_SCRATCH_OK
25736 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
25738 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
25739 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
25741 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
25742 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
25744 #undef TARGET_INIT_LIBFUNCS
25745 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
25747 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
25748 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
25750 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
25751 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
25753 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
25754 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
25756 #undef TARGET_HARD_REGNO_NREGS
25757 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
25758 #undef TARGET_HARD_REGNO_MODE_OK
25759 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
25761 #undef TARGET_MODES_TIEABLE_P
25762 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
25764 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
25765 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
25766 ix86_hard_regno_call_part_clobbered
25768 #undef TARGET_INSN_CALLEE_ABI
25769 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
25771 #undef TARGET_CAN_CHANGE_MODE_CLASS
25772 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
25774 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
25775 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
25777 #undef TARGET_STATIC_RTX_ALIGNMENT
25778 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
25779 #undef TARGET_CONSTANT_ALIGNMENT
25780 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
25782 #undef TARGET_EMPTY_RECORD_P
25783 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
25785 #undef TARGET_WARN_PARAMETER_PASSING_ABI
25786 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
25788 #undef TARGET_GET_MULTILIB_ABI_NAME
25789 #define TARGET_GET_MULTILIB_ABI_NAME \
25790 ix86_get_multilib_abi_name
25792 #undef TARGET_IFUNC_REF_LOCAL_OK
25793 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
25795 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25796 # undef TARGET_ASM_RELOC_RW_MASK
25797 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
25800 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
25801 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
25803 #undef TARGET_MEMTAG_ADD_TAG
25804 #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
25806 #undef TARGET_MEMTAG_SET_TAG
25807 #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
25809 #undef TARGET_MEMTAG_EXTRACT_TAG
25810 #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
25812 #undef TARGET_MEMTAG_UNTAGGED_POINTER
25813 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
25815 #undef TARGET_MEMTAG_TAG_SIZE
25816 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
25819 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED
)
25821 #ifdef OPTION_GLIBC
25823 return (built_in_function
)fcode
== BUILT_IN_MEMPCPY
;
25831 #undef TARGET_LIBC_HAS_FAST_FUNCTION
25832 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
25835 ix86_libm_function_max_error (unsigned cfn
, machine_mode mode
,
25838 #ifdef OPTION_GLIBC
25839 bool glibc_p
= OPTION_GLIBC
;
25841 bool glibc_p
= false;
25845 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
25846 unsigned int libmvec_ret
= 0;
25847 if (!flag_trapping_math
25848 && flag_unsafe_math_optimizations
25849 && flag_finite_math_only
25850 && !flag_signed_zeros
25851 && !flag_errno_math
)
25860 /* With non-default rounding modes, libmvec provides
25861 complete garbage in results. E.g.
25862 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
25863 returns 0.00333309174f rather than 1.40129846e-45f. */
25864 if (flag_rounding_math
)
25866 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
25867 claims libmvec maximum error is 4ulps.
25868 My own random testing indicates 2ulps for SFmode and
25869 0.5ulps for DFmode, but let's go with the 4ulps. */
25876 unsigned int ret
= glibc_linux_libm_function_max_error (cfn
, mode
,
25878 return MAX (ret
, libmvec_ret
);
25880 return default_libm_function_max_error (cfn
, mode
, boundary_p
);
25883 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
25884 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
25887 #undef TARGET_RUN_TARGET_SELFTESTS
25888 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
25889 #endif /* #if CHECKING_P */
25891 struct gcc_target targetm
= TARGET_INITIALIZER
;
25893 #include "gt-i386.h"