1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define INCLUDE_STRING
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
35 #include "stringpool.h"
42 #include "diagnostic.h"
45 #include "fold-const.h"
48 #include "stor-layout.h"
51 #include "insn-attr.h"
57 #include "common/common-target.h"
58 #include "langhooks.h"
62 #include "tm-constrs.h"
64 #include "sched-int.h"
66 #include "tree-pass.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "gimple-fold.h"
72 #include "tree-vectorizer.h"
73 #include "shrink-wrap.h"
76 #include "tree-iterator.h"
78 #include "case-cfn-macros.h"
80 #include "fold-const-call.h"
82 #include "tree-ssanames.h"
84 #include "selftest-rtl.h"
85 #include "print-rtl.h"
88 #include "symbol-summary.h"
92 #include "ipa-fnsummary.h"
93 #include "wide-int-bitmask.h"
94 #include "tree-vector-builder.h"
96 #include "dwarf2out.h"
97 #include "i386-options.h"
98 #include "i386-builtins.h"
99 #include "i386-expand.h"
100 #include "i386-features.h"
101 #include "function-abi.h"
102 #include "rtl-error.h"
104 /* This file should be included last. */
105 #include "target-def.h"
107 static rtx
legitimize_dllimport_symbol (rtx
, bool);
108 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
109 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
110 static void ix86_emit_restore_reg_using_pop (rtx
, bool = false);
113 #ifndef CHECK_STACK_LIMIT
114 #define CHECK_STACK_LIMIT (-1)
117 /* Return index of given mode in mult and division cost tables. */
118 #define MODE_INDEX(mode) \
119 ((mode) == QImode ? 0 \
120 : (mode) == HImode ? 1 \
121 : (mode) == SImode ? 2 \
122 : (mode) == DImode ? 3 \
127 const struct processor_costs
*ix86_tune_cost
= NULL
;
129 /* Set by -mtune or -Os. */
130 const struct processor_costs
*ix86_cost
= NULL
;
132 /* In case the average insn count for single function invocation is
133 lower than this constant, emit fast (but longer) prologue and
135 #define FAST_PROLOGUE_INSN_COUNT 20
137 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
138 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
139 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
140 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
142 /* Array of the smallest class containing reg number REGNO, indexed by
143 REGNO. Used by REGNO_REG_CLASS in i386.h. */
145 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
148 AREG
, DREG
, CREG
, BREG
,
150 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
152 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
153 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
154 /* arg pointer, flags, fpsr, frame */
155 NON_Q_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
157 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
158 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
160 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
161 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
163 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
164 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
165 /* SSE REX registers */
166 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
167 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
168 /* AVX-512 SSE registers */
169 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
170 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
171 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
172 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
173 /* Mask registers. */
174 ALL_MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
175 MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
177 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
178 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
179 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
180 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
183 /* The "default" register map used in 32bit mode. */
185 int const debugger_register_map
[FIRST_PSEUDO_REGISTER
] =
188 0, 2, 1, 3, 6, 7, 4, 5,
190 12, 13, 14, 15, 16, 17, 18, 19,
191 /* arg, flags, fpsr, frame */
192 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
193 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
195 21, 22, 23, 24, 25, 26, 27, 28,
197 29, 30, 31, 32, 33, 34, 35, 36,
198 /* extended integer registers */
199 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
200 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
201 /* extended sse registers */
202 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
203 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
204 /* AVX-512 registers 16-23 */
205 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
206 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
207 /* AVX-512 registers 24-31 */
208 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
209 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
211 93, 94, 95, 96, 97, 98, 99, 100
214 /* The "default" register map used in 64bit mode. */
216 int const debugger64_register_map
[FIRST_PSEUDO_REGISTER
] =
219 0, 1, 2, 3, 4, 5, 6, 7,
221 33, 34, 35, 36, 37, 38, 39, 40,
222 /* arg, flags, fpsr, frame */
223 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
224 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
226 17, 18, 19, 20, 21, 22, 23, 24,
228 41, 42, 43, 44, 45, 46, 47, 48,
229 /* extended integer registers */
230 8, 9, 10, 11, 12, 13, 14, 15,
231 /* extended SSE registers */
232 25, 26, 27, 28, 29, 30, 31, 32,
233 /* AVX-512 registers 16-23 */
234 67, 68, 69, 70, 71, 72, 73, 74,
235 /* AVX-512 registers 24-31 */
236 75, 76, 77, 78, 79, 80, 81, 82,
238 118, 119, 120, 121, 122, 123, 124, 125,
239 /* rex2 extend interger registers */
240 130, 131, 132, 133, 134, 135, 136, 137,
241 138, 139, 140, 141, 142, 143, 144, 145
244 /* Define the register numbers to be used in Dwarf debugging information.
245 The SVR4 reference port C compiler uses the following register numbers
246 in its Dwarf output code:
247 0 for %eax (gcc regno = 0)
248 1 for %ecx (gcc regno = 2)
249 2 for %edx (gcc regno = 1)
250 3 for %ebx (gcc regno = 3)
251 4 for %esp (gcc regno = 7)
252 5 for %ebp (gcc regno = 6)
253 6 for %esi (gcc regno = 4)
254 7 for %edi (gcc regno = 5)
255 The following three DWARF register numbers are never generated by
256 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
257 believed these numbers have these meanings.
258 8 for %eip (no gcc equivalent)
259 9 for %eflags (gcc regno = 17)
260 10 for %trapno (no gcc equivalent)
261 It is not at all clear how we should number the FP stack registers
262 for the x86 architecture. If the version of SDB on x86/svr4 were
263 a bit less brain dead with respect to floating-point then we would
264 have a precedent to follow with respect to DWARF register numbers
265 for x86 FP registers, but the SDB on x86/svr4 was so completely
266 broken with respect to FP registers that it is hardly worth thinking
267 of it as something to strive for compatibility with.
268 The version of x86/svr4 SDB I had does (partially)
269 seem to believe that DWARF register number 11 is associated with
270 the x86 register %st(0), but that's about all. Higher DWARF
271 register numbers don't seem to be associated with anything in
272 particular, and even for DWARF regno 11, SDB only seemed to under-
273 stand that it should say that a variable lives in %st(0) (when
274 asked via an `=' command) if we said it was in DWARF regno 11,
275 but SDB still printed garbage when asked for the value of the
276 variable in question (via a `/' command).
277 (Also note that the labels SDB printed for various FP stack regs
278 when doing an `x' command were all wrong.)
279 Note that these problems generally don't affect the native SVR4
280 C compiler because it doesn't allow the use of -O with -g and
281 because when it is *not* optimizing, it allocates a memory
282 location for each floating-point variable, and the memory
283 location is what gets described in the DWARF AT_location
284 attribute for the variable in question.
285 Regardless of the severe mental illness of the x86/svr4 SDB, we
286 do something sensible here and we use the following DWARF
287 register numbers. Note that these are all stack-top-relative
289 11 for %st(0) (gcc regno = 8)
290 12 for %st(1) (gcc regno = 9)
291 13 for %st(2) (gcc regno = 10)
292 14 for %st(3) (gcc regno = 11)
293 15 for %st(4) (gcc regno = 12)
294 16 for %st(5) (gcc regno = 13)
295 17 for %st(6) (gcc regno = 14)
296 18 for %st(7) (gcc regno = 15)
298 int const svr4_debugger_register_map
[FIRST_PSEUDO_REGISTER
] =
301 0, 2, 1, 3, 6, 7, 5, 4,
303 11, 12, 13, 14, 15, 16, 17, 18,
304 /* arg, flags, fpsr, frame */
305 IGNORED_DWARF_REGNUM
, 9,
306 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
308 21, 22, 23, 24, 25, 26, 27, 28,
310 29, 30, 31, 32, 33, 34, 35, 36,
311 /* extended integer registers */
312 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
313 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
314 /* extended sse registers */
315 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
316 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
317 /* AVX-512 registers 16-23 */
318 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
319 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
320 /* AVX-512 registers 24-31 */
321 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
322 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
324 93, 94, 95, 96, 97, 98, 99, 100
327 /* Define parameter passing and return registers. */
329 static int const x86_64_int_parameter_registers
[6] =
331 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
334 static int const x86_64_ms_abi_int_parameter_registers
[4] =
336 CX_REG
, DX_REG
, R8_REG
, R9_REG
339 static int const x86_64_int_return_registers
[4] =
341 AX_REG
, DX_REG
, DI_REG
, SI_REG
344 /* Define the structure for the machine field in struct function. */
346 struct GTY(()) stack_local_entry
{
350 struct stack_local_entry
*next
;
353 /* Which cpu are we scheduling for. */
354 enum attr_cpu ix86_schedule
;
356 /* Which cpu are we optimizing for. */
357 enum processor_type ix86_tune
;
359 /* Which instruction set architecture to use. */
360 enum processor_type ix86_arch
;
362 /* True if processor has SSE prefetch instruction. */
363 unsigned char ix86_prefetch_sse
;
365 /* Preferred alignment for stack boundary in bits. */
366 unsigned int ix86_preferred_stack_boundary
;
368 /* Alignment for incoming stack boundary in bits specified at
370 unsigned int ix86_user_incoming_stack_boundary
;
372 /* Default alignment for incoming stack boundary in bits. */
373 unsigned int ix86_default_incoming_stack_boundary
;
375 /* Alignment for incoming stack boundary in bits. */
376 unsigned int ix86_incoming_stack_boundary
;
378 /* True if there is no direct access to extern symbols. */
379 bool ix86_has_no_direct_extern_access
;
381 /* Calling abi specific va_list type nodes. */
382 tree sysv_va_list_type_node
;
383 tree ms_va_list_type_node
;
385 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
386 char internal_label_prefix
[16];
387 int internal_label_prefix_len
;
389 /* Fence to use after loop using movnt. */
392 /* Register class used for passing given 64bit part of the argument.
393 These represent classes as documented by the PS ABI, with the exception
394 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
395 use SF or DFmode move instead of DImode to avoid reformatting penalties.
397 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
398 whenever possible (upper half does contain padding). */
399 enum x86_64_reg_class
402 X86_64_INTEGER_CLASS
,
403 X86_64_INTEGERSI_CLASS
,
411 X86_64_COMPLEX_X87_CLASS
,
415 #define MAX_CLASSES 8
417 /* Table of constants used by fldpi, fldln2, etc.... */
418 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
419 static bool ext_80387_constants_init
;
422 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
423 static bool ix86_function_value_regno_p (const unsigned int);
424 static unsigned int ix86_function_arg_boundary (machine_mode
,
426 static rtx
ix86_static_chain (const_tree
, bool);
427 static int ix86_function_regparm (const_tree
, const_tree
);
428 static void ix86_compute_frame_layout (void);
429 static tree
ix86_canonical_va_list_type (tree
);
430 static unsigned int split_stack_prologue_scratch_regno (void);
431 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
433 static bool ix86_can_inline_p (tree
, tree
);
434 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
437 /* Whether -mtune= or -march= were specified */
438 int ix86_tune_defaulted
;
439 int ix86_arch_specified
;
441 /* Return true if a red-zone is in use. We can't use red-zone when
442 there are local indirect jumps, like "indirect_jump" or "tablejump",
443 which jumps to another place in the function, since "call" in the
444 indirect thunk pushes the return address onto stack, destroying
447 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
448 for CALL, in red-zone, we can allow local indirect jumps with
452 ix86_using_red_zone (void)
454 return (TARGET_RED_ZONE
455 && !TARGET_64BIT_MS_ABI
456 && (!cfun
->machine
->has_local_indirect_jump
457 || cfun
->machine
->indirect_branch_type
== indirect_branch_keep
));
460 /* Return true, if profiling code should be emitted before
461 prologue. Otherwise it returns false.
462 Note: For x86 with "hotfix" it is sorried. */
464 ix86_profile_before_prologue (void)
466 return flag_fentry
!= 0;
469 /* Update register usage after having seen the compiler flags. */
472 ix86_conditional_register_usage (void)
476 /* If there are no caller-saved registers, preserve all registers.
477 except fixed_regs and registers used for function return value
478 since aggregate_value_p checks call_used_regs[regno] on return
481 && (cfun
->machine
->call_saved_registers
482 == TYPE_NO_CALLER_SAVED_REGISTERS
))
483 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
484 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
485 call_used_regs
[i
] = 0;
487 /* For 32-bit targets, disable the REX registers. */
490 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
491 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
492 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
493 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
494 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
495 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
498 /* See the definition of CALL_USED_REGISTERS in i386.h. */
499 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
501 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
503 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
505 /* Set/reset conditionally defined registers from
506 CALL_USED_REGISTERS initializer. */
507 if (call_used_regs
[i
] > 1)
508 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
510 /* Calculate registers of CLOBBERED_REGS register set
511 as call used registers from GENERAL_REGS register set. */
512 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
513 && call_used_regs
[i
])
514 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
517 /* If MMX is disabled, disable the registers. */
519 accessible_reg_set
&= ~reg_class_contents
[MMX_REGS
];
521 /* If SSE is disabled, disable the registers. */
523 accessible_reg_set
&= ~reg_class_contents
[ALL_SSE_REGS
];
525 /* If the FPU is disabled, disable the registers. */
526 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
527 accessible_reg_set
&= ~reg_class_contents
[FLOAT_REGS
];
529 /* If AVX512F is disabled, disable the registers. */
530 if (! TARGET_AVX512F
)
532 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
533 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
535 accessible_reg_set
&= ~reg_class_contents
[ALL_MASK_REGS
];
538 /* If APX is disabled, disable the registers. */
539 if (! (TARGET_APX_EGPR
&& TARGET_64BIT
))
541 for (i
= FIRST_REX2_INT_REG
; i
<= LAST_REX2_INT_REG
; i
++)
542 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
546 /* Canonicalize a comparison from one we don't have to one we do have. */
549 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
550 bool op0_preserve_value
)
552 /* The order of operands in x87 ficom compare is forced by combine in
553 simplify_comparison () function. Float operator is treated as RTX_OBJ
554 with a precedence over other operators and is always put in the first
555 place. Swap condition and operands to match ficom instruction. */
556 if (!op0_preserve_value
557 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
559 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
561 /* We are called only for compares that are split to SAHF instruction.
562 Ensure that we have setcc/jcc insn for the swapped condition. */
563 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
565 std::swap (*op0
, *op1
);
572 /* Hook to determine if one function can safely inline another. */
575 ix86_can_inline_p (tree caller
, tree callee
)
577 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
578 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
580 /* Changes of those flags can be tolerated for always inlines. Lets hope
581 user knows what he is doing. */
582 unsigned HOST_WIDE_INT always_inline_safe_mask
583 = (MASK_USE_8BIT_IDIV
| MASK_ACCUMULATE_OUTGOING_ARGS
584 | MASK_NO_ALIGN_STRINGOPS
| MASK_AVX256_SPLIT_UNALIGNED_LOAD
585 | MASK_AVX256_SPLIT_UNALIGNED_STORE
| MASK_CLD
586 | MASK_NO_FANCY_MATH_387
| MASK_IEEE_FP
| MASK_INLINE_ALL_STRINGOPS
587 | MASK_INLINE_STRINGOPS_DYNAMICALLY
| MASK_RECIP
| MASK_STACK_PROBE
588 | MASK_STV
| MASK_TLS_DIRECT_SEG_REFS
| MASK_VZEROUPPER
589 | MASK_NO_PUSH_ARGS
| MASK_OMIT_LEAF_FRAME_POINTER
);
593 callee_tree
= target_option_default_node
;
595 caller_tree
= target_option_default_node
;
596 if (callee_tree
== caller_tree
)
599 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
600 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
603 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
604 && lookup_attribute ("always_inline",
605 DECL_ATTRIBUTES (callee
)));
607 /* If callee only uses GPRs, ignore MASK_80387. */
608 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts
->x_ix86_target_flags
))
609 always_inline_safe_mask
|= MASK_80387
;
611 cgraph_node
*callee_node
= cgraph_node::get (callee
);
612 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
613 function can inline a SSE2 function but a SSE2 function can't inline
615 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
616 != callee_opts
->x_ix86_isa_flags
)
617 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
618 != callee_opts
->x_ix86_isa_flags2
))
621 /* See if we have the same non-isa options. */
622 else if ((!always_inline
623 && caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
624 || (caller_opts
->x_target_flags
& ~always_inline_safe_mask
)
625 != (callee_opts
->x_target_flags
& ~always_inline_safe_mask
))
628 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
629 /* If the calle doesn't use FP expressions differences in
630 ix86_fpmath can be ignored. We are called from FEs
631 for multi-versioning call optimization, so beware of
632 ipa_fn_summaries not available. */
633 && (! ipa_fn_summaries
634 || ipa_fn_summaries
->get (callee_node
) == NULL
635 || ipa_fn_summaries
->get (callee_node
)->fp_expressions
))
638 /* At this point we cannot identify whether arch or tune setting
639 comes from target attribute or not. So the most conservative way
640 is to allow the callee that uses default arch and tune string to
642 else if (!strcmp (callee_opts
->x_ix86_arch_string
, "x86-64")
643 && !strcmp (callee_opts
->x_ix86_tune_string
, "generic"))
646 /* See if arch, tune, etc. are the same. As previous ISA flags already
647 checks if callee's ISA is subset of caller's, do not block
648 always_inline attribute for callee even it has different arch. */
649 else if (!always_inline
&& caller_opts
->arch
!= callee_opts
->arch
)
652 else if (!always_inline
&& caller_opts
->tune
!= callee_opts
->tune
)
655 else if (!always_inline
656 && caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
665 /* Return true if this goes in large data/bss. */
668 ix86_in_large_data_p (tree exp
)
670 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
671 && ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_LARGE_PIC
)
674 if (exp
== NULL_TREE
)
677 /* Functions are never large data. */
678 if (TREE_CODE (exp
) == FUNCTION_DECL
)
681 /* Automatic variables are never large data. */
682 if (VAR_P (exp
) && !is_global_var (exp
))
685 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
687 const char *section
= DECL_SECTION_NAME (exp
);
688 if (strcmp (section
, ".ldata") == 0
689 || strcmp (section
, ".lbss") == 0)
695 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
697 /* If this is an incomplete type with size 0, then we can't put it
698 in data because it might be too big when completed. Also,
699 int_size_in_bytes returns -1 if size can vary or is larger than
700 an integer in which case also it is safer to assume that it goes in
702 if (size
<= 0 || size
> ix86_section_threshold
)
709 /* i386-specific section flag to mark large sections. */
710 #define SECTION_LARGE SECTION_MACH_DEP
712 /* Switch to the appropriate section for output of DECL.
713 DECL is either a `VAR_DECL' node or a constant of some sort.
714 RELOC indicates whether forming the initial value of DECL requires
715 link-time relocations. */
717 ATTRIBUTE_UNUSED
static section
*
718 x86_64_elf_select_section (tree decl
, int reloc
,
719 unsigned HOST_WIDE_INT align
)
721 if (ix86_in_large_data_p (decl
))
723 const char *sname
= NULL
;
724 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
725 switch (categorize_decl_for_section (decl
, reloc
))
730 case SECCAT_DATA_REL
:
731 sname
= ".ldata.rel";
733 case SECCAT_DATA_REL_LOCAL
:
734 sname
= ".ldata.rel.local";
736 case SECCAT_DATA_REL_RO
:
737 sname
= ".ldata.rel.ro";
739 case SECCAT_DATA_REL_RO_LOCAL
:
740 sname
= ".ldata.rel.ro.local";
744 flags
|= SECTION_BSS
;
747 case SECCAT_RODATA_MERGE_STR
:
748 case SECCAT_RODATA_MERGE_STR_INIT
:
749 case SECCAT_RODATA_MERGE_CONST
:
751 flags
&= ~SECTION_WRITE
;
760 /* We don't split these for medium model. Place them into
761 default sections and hope for best. */
766 /* We might get called with string constants, but get_named_section
767 doesn't like them as they are not DECLs. Also, we need to set
768 flags in that case. */
770 return get_section (sname
, flags
, NULL
);
771 return get_named_section (decl
, sname
, reloc
);
774 return default_elf_select_section (decl
, reloc
, align
);
777 /* Select a set of attributes for section NAME based on the properties
778 of DECL and whether or not RELOC indicates that DECL's initializer
779 might contain runtime relocations. */
781 static unsigned int ATTRIBUTE_UNUSED
782 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
784 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
786 if (ix86_in_large_data_p (decl
))
787 flags
|= SECTION_LARGE
;
789 if (decl
== NULL_TREE
790 && (strcmp (name
, ".ldata.rel.ro") == 0
791 || strcmp (name
, ".ldata.rel.ro.local") == 0))
792 flags
|= SECTION_RELRO
;
794 if (strcmp (name
, ".lbss") == 0
795 || startswith (name
, ".lbss.")
796 || startswith (name
, ".gnu.linkonce.lb."))
797 flags
|= SECTION_BSS
;
802 /* Build up a unique section name, expressed as a
803 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
804 RELOC indicates whether the initial value of EXP requires
805 link-time relocations. */
807 static void ATTRIBUTE_UNUSED
808 x86_64_elf_unique_section (tree decl
, int reloc
)
810 if (ix86_in_large_data_p (decl
))
812 const char *prefix
= NULL
;
813 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
814 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
816 switch (categorize_decl_for_section (decl
, reloc
))
819 case SECCAT_DATA_REL
:
820 case SECCAT_DATA_REL_LOCAL
:
821 case SECCAT_DATA_REL_RO
:
822 case SECCAT_DATA_REL_RO_LOCAL
:
823 prefix
= one_only
? ".ld" : ".ldata";
826 prefix
= one_only
? ".lb" : ".lbss";
829 case SECCAT_RODATA_MERGE_STR
:
830 case SECCAT_RODATA_MERGE_STR_INIT
:
831 case SECCAT_RODATA_MERGE_CONST
:
832 prefix
= one_only
? ".lr" : ".lrodata";
841 /* We don't split these for medium model. Place them into
842 default sections and hope for best. */
847 const char *name
, *linkonce
;
850 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
851 name
= targetm
.strip_name_encoding (name
);
853 /* If we're using one_only, then there needs to be a .gnu.linkonce
854 prefix to the section name. */
855 linkonce
= one_only
? ".gnu.linkonce" : "";
857 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
859 set_decl_section_name (decl
, string
);
863 default_unique_section (decl
, reloc
);
868 #ifndef LARGECOMM_SECTION_ASM_OP
869 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
872 /* This says how to output assembler code to declare an
873 uninitialized external linkage data object.
875 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
878 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
879 const char *name
, unsigned HOST_WIDE_INT size
,
882 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
883 || ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
884 && size
> (unsigned int)ix86_section_threshold
)
886 switch_to_section (get_named_section (decl
, ".lbss", 0));
887 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
890 fputs (COMMON_ASM_OP
, file
);
891 assemble_name (file
, name
);
892 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
893 size
, align
/ BITS_PER_UNIT
);
897 /* Utility function for targets to use in implementing
898 ASM_OUTPUT_ALIGNED_BSS. */
901 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
902 unsigned HOST_WIDE_INT size
, unsigned align
)
904 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
905 || ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
906 && size
> (unsigned int)ix86_section_threshold
)
907 switch_to_section (get_named_section (decl
, ".lbss", 0));
909 switch_to_section (bss_section
);
910 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
911 #ifdef ASM_DECLARE_OBJECT_NAME
912 last_assemble_variable_decl
= decl
;
913 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
915 /* Standard thing is just output label for the object. */
916 ASM_OUTPUT_LABEL (file
, name
);
917 #endif /* ASM_DECLARE_OBJECT_NAME */
918 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
921 /* Decide whether we must probe the stack before any space allocation
922 on this target. It's essentially TARGET_STACK_PROBE except when
923 -fstack-check causes the stack to be already probed differently. */
926 ix86_target_stack_probe (void)
928 /* Do not probe the stack twice if static stack checking is enabled. */
929 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
932 return TARGET_STACK_PROBE
;
935 /* Decide whether we can make a sibling call to a function. DECL is the
936 declaration of the function being targeted by the call and EXP is the
937 CALL_EXPR representing the call. */
940 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
942 tree type
, decl_or_type
;
944 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
946 if (ix86_function_naked (current_function_decl
))
949 /* Sibling call isn't OK if there are no caller-saved registers
950 since all registers must be preserved before return. */
951 if (cfun
->machine
->call_saved_registers
952 == TYPE_NO_CALLER_SAVED_REGISTERS
)
955 /* If we are generating position-independent code, we cannot sibcall
956 optimize direct calls to global functions, as the PLT requires
957 %ebx be live. (Darwin does not have a PLT.) */
965 /* If we need to align the outgoing stack, then sibcalling would
966 unalign the stack, which may break the called function. */
967 if (ix86_minimum_incoming_stack_boundary (true)
968 < PREFERRED_STACK_BOUNDARY
)
974 type
= TREE_TYPE (decl
);
978 /* We're looking at the CALL_EXPR, we need the type of the function. */
979 type
= CALL_EXPR_FN (exp
); /* pointer expression */
980 type
= TREE_TYPE (type
); /* pointer type */
981 type
= TREE_TYPE (type
); /* function type */
985 /* Sibling call isn't OK if callee has no callee-saved registers
986 and the calling function has callee-saved registers. */
987 if ((cfun
->machine
->call_saved_registers
988 != TYPE_NO_CALLEE_SAVED_REGISTERS
)
989 && lookup_attribute ("no_callee_saved_registers",
990 TYPE_ATTRIBUTES (type
)))
993 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
994 if ((OUTGOING_REG_PARM_STACK_SPACE (type
)
995 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl
)))
996 || (REG_PARM_STACK_SPACE (decl_or_type
)
997 != REG_PARM_STACK_SPACE (current_function_decl
)))
999 maybe_complain_about_tail_call (exp
,
1000 "inconsistent size of stack space"
1001 " allocated for arguments which are"
1002 " passed in registers");
1006 /* Check that the return value locations are the same. Like
1007 if we are returning floats on the 80387 register stack, we cannot
1008 make a sibcall from a function that doesn't return a float to a
1009 function that does or, conversely, from a function that does return
1010 a float to a function that doesn't; the necessary stack adjustment
1011 would not be executed. This is also the place we notice
1012 differences in the return value ABI. Note that it is ok for one
1013 of the functions to have void return type as long as the return
1014 value of the other is passed in a register. */
1015 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
1016 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
1018 if (STACK_REG_P (a
) || STACK_REG_P (b
))
1020 if (!rtx_equal_p (a
, b
))
1023 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
1025 else if (!rtx_equal_p (a
, b
))
1030 /* The SYSV ABI has more call-clobbered registers;
1031 disallow sibcalls from MS to SYSV. */
1032 if (cfun
->machine
->call_abi
== MS_ABI
1033 && ix86_function_type_abi (type
) == SYSV_ABI
)
1038 /* If this call is indirect, we'll need to be able to use a
1039 call-clobbered register for the address of the target function.
1040 Make sure that all such registers are not used for passing
1041 parameters. Note that DLLIMPORT functions and call to global
1042 function via GOT slot are indirect. */
1044 || (bind_global
&& flag_pic
&& !flag_plt
)
1045 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
))
1046 || flag_force_indirect_call
)
1048 /* Check if regparm >= 3 since arg_reg_available is set to
1049 false if regparm == 0. If regparm is 1 or 2, there is
1050 always a call-clobbered register available.
1052 ??? The symbol indirect call doesn't need a call-clobbered
1053 register. But we don't know if this is a symbol indirect
1054 call or not here. */
1055 if (ix86_function_regparm (type
, decl
) >= 3
1056 && !cfun
->machine
->arg_reg_available
)
1061 if (decl
&& ix86_use_pseudo_pic_reg ())
1063 /* When PIC register is used, it must be restored after ifunc
1064 function returns. */
1065 cgraph_node
*node
= cgraph_node::get (decl
);
1066 if (node
&& node
->ifunc_resolver
)
1070 /* Disable sibcall if callee has indirect_return attribute and
1071 caller doesn't since callee will return to the caller's caller
1072 via an indirect jump. */
1073 if (((flag_cf_protection
& (CF_RETURN
| CF_BRANCH
))
1074 == (CF_RETURN
| CF_BRANCH
))
1075 && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type
))
1076 && !lookup_attribute ("indirect_return",
1077 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
))))
1080 /* Otherwise okay. That also includes certain types of indirect calls. */
1084 /* This function determines from TYPE the calling-convention. */
1087 ix86_get_callcvt (const_tree type
)
1089 unsigned int ret
= 0;
1094 return IX86_CALLCVT_CDECL
;
1096 attrs
= TYPE_ATTRIBUTES (type
);
1097 if (attrs
!= NULL_TREE
)
1099 if (lookup_attribute ("cdecl", attrs
))
1100 ret
|= IX86_CALLCVT_CDECL
;
1101 else if (lookup_attribute ("stdcall", attrs
))
1102 ret
|= IX86_CALLCVT_STDCALL
;
1103 else if (lookup_attribute ("fastcall", attrs
))
1104 ret
|= IX86_CALLCVT_FASTCALL
;
1105 else if (lookup_attribute ("thiscall", attrs
))
1106 ret
|= IX86_CALLCVT_THISCALL
;
1108 /* Regparam isn't allowed for thiscall and fastcall. */
1109 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
1111 if (lookup_attribute ("regparm", attrs
))
1112 ret
|= IX86_CALLCVT_REGPARM
;
1113 if (lookup_attribute ("sseregparm", attrs
))
1114 ret
|= IX86_CALLCVT_SSEREGPARM
;
1117 if (IX86_BASE_CALLCVT(ret
) != 0)
1121 is_stdarg
= stdarg_p (type
);
1122 if (TARGET_RTD
&& !is_stdarg
)
1123 return IX86_CALLCVT_STDCALL
| ret
;
1127 || TREE_CODE (type
) != METHOD_TYPE
1128 || ix86_function_type_abi (type
) != MS_ABI
)
1129 return IX86_CALLCVT_CDECL
| ret
;
1131 return IX86_CALLCVT_THISCALL
;
1134 /* Return 0 if the attributes for two types are incompatible, 1 if they
1135 are compatible, and 2 if they are nearly compatible (which causes a
1136 warning to be generated). */
1139 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
1141 unsigned int ccvt1
, ccvt2
;
1143 if (TREE_CODE (type1
) != FUNCTION_TYPE
1144 && TREE_CODE (type1
) != METHOD_TYPE
)
1147 ccvt1
= ix86_get_callcvt (type1
);
1148 ccvt2
= ix86_get_callcvt (type2
);
1151 if (ix86_function_regparm (type1
, NULL
)
1152 != ix86_function_regparm (type2
, NULL
))
1155 if (lookup_attribute ("no_callee_saved_registers",
1156 TYPE_ATTRIBUTES (type1
))
1157 != lookup_attribute ("no_callee_saved_registers",
1158 TYPE_ATTRIBUTES (type2
)))
1164 /* Return the regparm value for a function with the indicated TYPE and DECL.
1165 DECL may be NULL when calling function indirectly
1166 or considering a libcall. */
1169 ix86_function_regparm (const_tree type
, const_tree decl
)
1176 return (ix86_function_type_abi (type
) == SYSV_ABI
1177 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
1178 ccvt
= ix86_get_callcvt (type
);
1179 regparm
= ix86_regparm
;
1181 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
1183 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1186 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1190 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1192 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1195 /* Use register calling convention for local functions when possible. */
1197 && TREE_CODE (decl
) == FUNCTION_DECL
)
1199 cgraph_node
*target
= cgraph_node::get (decl
);
1201 target
= target
->function_symbol ();
1203 /* Caller and callee must agree on the calling convention, so
1204 checking here just optimize means that with
1205 __attribute__((optimize (...))) caller could use regparm convention
1206 and callee not, or vice versa. Instead look at whether the callee
1207 is optimized or not. */
1208 if (target
&& opt_for_fn (target
->decl
, optimize
)
1209 && !(profile_flag
&& !flag_fentry
))
1211 if (target
->local
&& target
->can_change_signature
)
1213 int local_regparm
, globals
= 0, regno
;
1215 /* Make sure no regparm register is taken by a
1216 fixed register variable. */
1217 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
1219 if (fixed_regs
[local_regparm
])
1222 /* We don't want to use regparm(3) for nested functions as
1223 these use a static chain pointer in the third argument. */
1224 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
1227 /* Save a register for the split stack. */
1228 if (flag_split_stack
)
1230 if (local_regparm
== 3)
1232 else if (local_regparm
== 2
1233 && DECL_STATIC_CHAIN (target
->decl
))
1237 /* Each fixed register usage increases register pressure,
1238 so less registers should be used for argument passing.
1239 This functionality can be overriden by an explicit
1241 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
1242 if (fixed_regs
[regno
])
1246 = globals
< local_regparm
? local_regparm
- globals
: 0;
1248 if (local_regparm
> regparm
)
1249 regparm
= local_regparm
;
1257 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1258 DFmode (2) arguments in SSE registers for a function with the
1259 indicated TYPE and DECL. DECL may be NULL when calling function
1260 indirectly or considering a libcall. Return -1 if any FP parameter
1261 should be rejected by error. This is used in siutation we imply SSE
1262 calling convetion but the function is called from another function with
1263 SSE disabled. Otherwise return 0. */
1266 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
1268 gcc_assert (!TARGET_64BIT
);
1270 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1271 by the sseregparm attribute. */
1272 if (TARGET_SSEREGPARM
1273 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
1280 error ("calling %qD with attribute sseregparm without "
1281 "SSE/SSE2 enabled", decl
);
1283 error ("calling %qT with attribute sseregparm without "
1284 "SSE/SSE2 enabled", type
);
1295 cgraph_node
*target
= cgraph_node::get (decl
);
1297 target
= target
->function_symbol ();
1299 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1300 (and DFmode for SSE2) arguments in SSE registers. */
1302 /* TARGET_SSE_MATH */
1303 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
1304 && opt_for_fn (target
->decl
, optimize
)
1305 && !(profile_flag
&& !flag_fentry
))
1307 if (target
->local
&& target
->can_change_signature
)
1309 /* Refuse to produce wrong code when local function with SSE enabled
1310 is called from SSE disabled function.
1311 FIXME: We need a way to detect these cases cross-ltrans partition
1312 and avoid using SSE calling conventions on local functions called
1313 from function with SSE disabled. For now at least delay the
1314 warning until we know we are going to produce wrong code.
1316 if (!TARGET_SSE
&& warn
)
1318 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
1319 ->x_ix86_isa_flags
) ? 2 : 1;
1326 /* Return true if EAX is live at the start of the function. Used by
1327 ix86_expand_prologue to determine if we need special help before
1328 calling allocate_stack_worker. */
1331 ix86_eax_live_at_start_p (void)
1333 /* Cheat. Don't bother working forward from ix86_function_regparm
1334 to the function type to whether an actual argument is located in
1335 eax. Instead just look at cfg info, which is still close enough
1336 to correct at this point. This gives false positives for broken
1337 functions that might use uninitialized data that happens to be
1338 allocated in eax, but who cares? */
1339 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
1343 ix86_keep_aggregate_return_pointer (tree fntype
)
1349 attr
= lookup_attribute ("callee_pop_aggregate_return",
1350 TYPE_ATTRIBUTES (fntype
));
1352 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
1354 /* For 32-bit MS-ABI the default is to keep aggregate
1356 if (ix86_function_type_abi (fntype
) == MS_ABI
)
1359 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
1362 /* Value is the number of bytes of arguments automatically
1363 popped when returning from a subroutine call.
1364 FUNDECL is the declaration node of the function (as a tree),
1365 FUNTYPE is the data type of the function (as a tree),
1366 or for a library call it is an identifier node for the subroutine name.
1367 SIZE is the number of bytes of arguments passed on the stack.
1369 On the 80386, the RTD insn may be used to pop them if the number
1370 of args is fixed, but if the number is variable then the caller
1371 must pop them all. RTD can't be used for library calls now
1372 because the library is compiled with the Unix compiler.
1373 Use of RTD is a selectable option, since it is incompatible with
1374 standard Unix calling sequences. If the option is not selected,
1375 the caller must always pop the args.
1377 The attribute stdcall is equivalent to RTD on a per module basis. */
1380 ix86_return_pops_args (tree fundecl
, tree funtype
, poly_int64 size
)
1384 /* None of the 64-bit ABIs pop arguments. */
1388 ccvt
= ix86_get_callcvt (funtype
);
1390 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
1391 | IX86_CALLCVT_THISCALL
)) != 0
1392 && ! stdarg_p (funtype
))
1395 /* Lose any fake structure return argument if it is passed on the stack. */
1396 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1397 && !ix86_keep_aggregate_return_pointer (funtype
))
1399 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1401 return GET_MODE_SIZE (Pmode
);
1407 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1410 ix86_legitimate_combined_insn (rtx_insn
*insn
)
1414 /* Check operand constraints in case hard registers were propagated
1415 into insn pattern. This check prevents combine pass from
1416 generating insn patterns with invalid hard register operands.
1417 These invalid insns can eventually confuse reload to error out
1418 with a spill failure. See also PRs 46829 and 46843. */
1420 gcc_assert (INSN_CODE (insn
) >= 0);
1422 extract_insn (insn
);
1423 preprocess_constraints (insn
);
1425 int n_operands
= recog_data
.n_operands
;
1426 int n_alternatives
= recog_data
.n_alternatives
;
1427 for (i
= 0; i
< n_operands
; i
++)
1429 rtx op
= recog_data
.operand
[i
];
1430 machine_mode mode
= GET_MODE (op
);
1431 const operand_alternative
*op_alt
;
1436 /* A unary operator may be accepted by the predicate, but it
1437 is irrelevant for matching constraints. */
1443 if (REG_P (SUBREG_REG (op
))
1444 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
1445 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
1446 GET_MODE (SUBREG_REG (op
)),
1449 op
= SUBREG_REG (op
);
1452 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
1455 op_alt
= recog_op_alt
;
1457 /* Operand has no constraints, anything is OK. */
1458 win
= !n_alternatives
;
1460 alternative_mask preferred
= get_preferred_alternatives (insn
);
1461 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
1463 if (!TEST_BIT (preferred
, j
))
1465 if (op_alt
[i
].anything_ok
1466 || (op_alt
[i
].matches
!= -1
1468 (recog_data
.operand
[i
],
1469 recog_data
.operand
[op_alt
[i
].matches
]))
1470 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
1484 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1486 static unsigned HOST_WIDE_INT
1487 ix86_asan_shadow_offset (void)
1489 return SUBTARGET_SHADOW_OFFSET
;
1492 /* Argument support functions. */
1494 /* Return true when register may be used to pass function parameters. */
1496 ix86_function_arg_regno_p (int regno
)
1499 enum calling_abi call_abi
;
1500 const int *parm_regs
;
1502 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
1503 && regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)
1507 return (regno
< REGPARM_MAX
1508 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
1509 && regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
));
1511 /* TODO: The function should depend on current function ABI but
1512 builtins.cc would need updating then. Therefore we use the
1514 call_abi
= ix86_cfun_abi ();
1516 /* RAX is used as hidden argument to va_arg functions. */
1517 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
1520 if (call_abi
== MS_ABI
)
1521 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
1523 parm_regs
= x86_64_int_parameter_registers
;
1525 for (i
= 0; i
< (call_abi
== MS_ABI
1526 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
1527 if (regno
== parm_regs
[i
])
1532 /* Return if we do not know how to pass ARG solely in registers. */
1535 ix86_must_pass_in_stack (const function_arg_info
&arg
)
1537 if (must_pass_in_stack_var_size_or_pad (arg
))
1540 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1541 The layout_type routine is crafty and tries to trick us into passing
1542 currently unsupported vector types on the stack by using TImode. */
1543 return (!TARGET_64BIT
&& arg
.mode
== TImode
1544 && arg
.type
&& TREE_CODE (arg
.type
) != VECTOR_TYPE
);
1547 /* It returns the size, in bytes, of the area reserved for arguments passed
1548 in registers for the function represented by fndecl dependent to the used
1551 ix86_reg_parm_stack_space (const_tree fndecl
)
1553 enum calling_abi call_abi
= SYSV_ABI
;
1554 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
1555 call_abi
= ix86_function_abi (fndecl
);
1557 call_abi
= ix86_function_type_abi (fndecl
);
1558 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
1563 /* We add this as a workaround in order to use libc_has_function
1566 ix86_libc_has_function (enum function_class fn_class
)
1568 return targetm
.libc_has_function (fn_class
, NULL_TREE
);
1571 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1572 specifying the call abi used. */
1574 ix86_function_type_abi (const_tree fntype
)
1576 enum calling_abi abi
= ix86_abi
;
1578 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
1582 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
1585 if (TARGET_X32
&& !warned
)
1587 error ("X32 does not support %<ms_abi%> attribute");
1593 else if (abi
== MS_ABI
1594 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
1601 ix86_function_abi (const_tree fndecl
)
1603 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
1606 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1607 specifying the call abi used. */
1609 ix86_cfun_abi (void)
1611 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
1615 ix86_function_ms_hook_prologue (const_tree fn
)
1617 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
1619 if (decl_function_context (fn
) != NULL_TREE
)
1620 error_at (DECL_SOURCE_LOCATION (fn
),
1621 "%<ms_hook_prologue%> attribute is not compatible "
1622 "with nested function");
1630 ix86_function_naked (const_tree fn
)
1632 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
1638 /* Write the extra assembler code needed to declare a function properly. */
1641 ix86_asm_output_function_label (FILE *out_file
, const char *fname
,
1644 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
1647 cfun
->machine
->function_label_emitted
= true;
1651 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
1652 unsigned int filler_cc
= 0xcccccccc;
1654 for (i
= 0; i
< filler_count
; i
+= 4)
1655 fprintf (out_file
, ASM_LONG
" %#x\n", filler_cc
);
1658 #ifdef SUBTARGET_ASM_UNWIND_INIT
1659 SUBTARGET_ASM_UNWIND_INIT (out_file
);
1662 assemble_function_label_raw (out_file
, fname
);
1664 /* Output magic byte marker, if hot-patch attribute is set. */
1669 /* leaq [%rsp + 0], %rsp */
1670 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1675 /* movl.s %edi, %edi
1677 movl.s %esp, %ebp */
1678 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file
);
1683 /* Implementation of call abi switching target hook. Specific to FNDECL
1684 the specific call register sets are set. See also
1685 ix86_conditional_register_usage for more details. */
1687 ix86_call_abi_override (const_tree fndecl
)
1689 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
1692 /* Return 1 if pseudo register should be created and used to hold
1693 GOT address for PIC code. */
1695 ix86_use_pseudo_pic_reg (void)
1698 && (ix86_cmodel
== CM_SMALL_PIC
1705 /* Initialize large model PIC register. */
1708 ix86_init_large_pic_reg (unsigned int tmp_regno
)
1710 rtx_code_label
*label
;
1713 gcc_assert (Pmode
== DImode
);
1714 label
= gen_label_rtx ();
1716 LABEL_PRESERVE_P (label
) = 1;
1717 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
1718 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
1719 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
1721 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
1722 emit_insn (gen_add2_insn (pic_offset_table_rtx
, tmp_reg
));
1723 const char *name
= LABEL_NAME (label
);
1724 PUT_CODE (label
, NOTE
);
1725 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
1726 NOTE_DELETED_LABEL_NAME (label
) = name
;
1729 /* Create and initialize PIC register if required. */
1731 ix86_init_pic_reg (void)
1736 if (!ix86_use_pseudo_pic_reg ())
1743 if (ix86_cmodel
== CM_LARGE_PIC
)
1744 ix86_init_large_pic_reg (R11_REG
);
1746 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
1750 /* If there is future mcount call in the function it is more profitable
1751 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1752 rtx reg
= crtl
->profile
1753 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
1754 : pic_offset_table_rtx
;
1755 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
1756 RTX_FRAME_RELATED_P (insn
) = 1;
1758 emit_move_insn (pic_offset_table_rtx
, reg
);
1759 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
1765 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1766 insert_insn_on_edge (seq
, entry_edge
);
1767 commit_one_edge_insertion (entry_edge
);
1770 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1771 for a call to a function whose data type is FNTYPE.
1772 For a library call, FNTYPE is 0. */
1775 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1776 tree fntype
, /* tree ptr for function decl */
1777 rtx libname
, /* SYMBOL_REF of library name or 0 */
1781 struct cgraph_node
*local_info_node
= NULL
;
1782 struct cgraph_node
*target
= NULL
;
1784 /* Set silent_p to false to raise an error for invalid calls when
1785 expanding function body. */
1786 cfun
->machine
->silent_p
= false;
1788 memset (cum
, 0, sizeof (*cum
));
1792 target
= cgraph_node::get (fndecl
);
1795 target
= target
->function_symbol ();
1796 local_info_node
= cgraph_node::local_info_node (target
->decl
);
1797 cum
->call_abi
= ix86_function_abi (target
->decl
);
1800 cum
->call_abi
= ix86_function_abi (fndecl
);
1803 cum
->call_abi
= ix86_function_type_abi (fntype
);
1805 cum
->caller
= caller
;
1807 /* Set up the number of registers to use for passing arguments. */
1808 cum
->nregs
= ix86_regparm
;
1811 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
1812 ? X86_64_REGPARM_MAX
1813 : X86_64_MS_REGPARM_MAX
);
1817 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1820 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
1821 ? X86_64_SSE_REGPARM_MAX
1822 : X86_64_MS_SSE_REGPARM_MAX
);
1826 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1827 cum
->warn_avx512f
= true;
1828 cum
->warn_avx
= true;
1829 cum
->warn_sse
= true;
1830 cum
->warn_mmx
= true;
1832 /* Because type might mismatch in between caller and callee, we need to
1833 use actual type of function for local calls.
1834 FIXME: cgraph_analyze can be told to actually record if function uses
1835 va_start so for local functions maybe_vaarg can be made aggressive
1837 FIXME: once typesytem is fixed, we won't need this code anymore. */
1838 if (local_info_node
&& local_info_node
->local
1839 && local_info_node
->can_change_signature
)
1840 fntype
= TREE_TYPE (target
->decl
);
1841 cum
->stdarg
= stdarg_p (fntype
);
1842 cum
->maybe_vaarg
= (fntype
1843 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
1848 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
1849 if (!cum
->warn_empty
&& fntype
)
1851 function_args_iterator iter
;
1853 bool seen_empty_type
= false;
1854 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
1856 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
1858 if (TYPE_EMPTY_P (argtype
))
1859 seen_empty_type
= true;
1860 else if (seen_empty_type
)
1862 cum
->warn_empty
= true;
1870 /* If there are variable arguments, then we won't pass anything
1871 in registers in 32-bit mode. */
1872 if (stdarg_p (fntype
))
1875 /* Since in 32-bit, variable arguments are always passed on
1876 stack, there is scratch register available for indirect
1878 cfun
->machine
->arg_reg_available
= true;
1881 cum
->warn_avx512f
= false;
1882 cum
->warn_avx
= false;
1883 cum
->warn_sse
= false;
1884 cum
->warn_mmx
= false;
1888 /* Use ecx and edx registers if function has fastcall attribute,
1889 else look for regparm information. */
1892 unsigned int ccvt
= ix86_get_callcvt (fntype
);
1893 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1896 cum
->fastcall
= 1; /* Same first register as in fastcall. */
1898 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1904 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1907 /* Set up the number of SSE registers used for passing SFmode
1908 and DFmode arguments. Warn for mismatching ABI. */
1909 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
1912 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
1915 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1916 But in the case of vector types, it is some vector mode.
1918 When we have only some of our vector isa extensions enabled, then there
1919 are some modes for which vector_mode_supported_p is false. For these
1920 modes, the generic vector support in gcc will choose some non-vector mode
1921 in order to implement the type. By computing the natural mode, we'll
1922 select the proper ABI location for the operand and not depend on whatever
1923 the middle-end decides to do with these vector types.
1925 The midde-end can't deal with the vector types > 16 bytes. In this
1926 case, we return the original mode and warn ABI change if CUM isn't
1929 If INT_RETURN is true, warn ABI change if the vector mode isn't
1930 available for function return value. */
1933 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
1936 machine_mode mode
= TYPE_MODE (type
);
1938 if (VECTOR_TYPE_P (type
) && !VECTOR_MODE_P (mode
))
1940 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1941 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
1942 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1943 && TYPE_VECTOR_SUBPARTS (type
) > 1)
1945 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
1947 /* There are no XFmode vector modes ... */
1948 if (innermode
== XFmode
)
1951 /* ... and no decimal float vector modes. */
1952 if (DECIMAL_FLOAT_MODE_P (innermode
))
1955 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type
)))
1956 mode
= MIN_MODE_VECTOR_FLOAT
;
1958 mode
= MIN_MODE_VECTOR_INT
;
1960 /* Get the mode which has this inner mode and number of units. */
1961 FOR_EACH_MODE_FROM (mode
, mode
)
1962 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
1963 && GET_MODE_INNER (mode
) == innermode
)
1965 if (size
== 64 && (!TARGET_AVX512F
|| !TARGET_EVEX512
)
1968 static bool warnedavx512f
;
1969 static bool warnedavx512f_ret
;
1971 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
1973 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
1974 "without AVX512F enabled changes the ABI"))
1975 warnedavx512f
= true;
1977 else if (in_return
&& !warnedavx512f_ret
)
1979 if (warning (OPT_Wpsabi
, "AVX512F vector return "
1980 "without AVX512F enabled changes the ABI"))
1981 warnedavx512f_ret
= true;
1984 return TYPE_MODE (type
);
1986 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
1988 static bool warnedavx
;
1989 static bool warnedavx_ret
;
1991 if (cum
&& cum
->warn_avx
&& !warnedavx
)
1993 if (warning (OPT_Wpsabi
, "AVX vector argument "
1994 "without AVX enabled changes the ABI"))
1997 else if (in_return
&& !warnedavx_ret
)
1999 if (warning (OPT_Wpsabi
, "AVX vector return "
2000 "without AVX enabled changes the ABI"))
2001 warnedavx_ret
= true;
2004 return TYPE_MODE (type
);
2006 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
2010 static bool warnedsse
;
2011 static bool warnedsse_ret
;
2013 if (cum
&& cum
->warn_sse
&& !warnedsse
)
2015 if (warning (OPT_Wpsabi
, "SSE vector argument "
2016 "without SSE enabled changes the ABI"))
2019 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
2021 if (warning (OPT_Wpsabi
, "SSE vector return "
2022 "without SSE enabled changes the ABI"))
2023 warnedsse_ret
= true;
2026 else if ((size
== 8 && !TARGET_64BIT
)
2028 || cfun
->machine
->func_type
== TYPE_NORMAL
)
2032 static bool warnedmmx
;
2033 static bool warnedmmx_ret
;
2035 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
2037 if (warning (OPT_Wpsabi
, "MMX vector argument "
2038 "without MMX enabled changes the ABI"))
2041 else if (in_return
&& !warnedmmx_ret
)
2043 if (warning (OPT_Wpsabi
, "MMX vector return "
2044 "without MMX enabled changes the ABI"))
2045 warnedmmx_ret
= true;
2058 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2059 this may not agree with the mode that the type system has chosen for the
2060 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2061 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2064 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
2069 if (orig_mode
!= BLKmode
)
2070 tmp
= gen_rtx_REG (orig_mode
, regno
);
2073 tmp
= gen_rtx_REG (mode
, regno
);
2074 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
2075 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2081 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2082 of this code is to classify each 8bytes of incoming argument by the register
2083 class and assign registers accordingly. */
2085 /* Return the union class of CLASS1 and CLASS2.
2086 See the x86-64 PS ABI for details. */
2088 static enum x86_64_reg_class
2089 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2091 /* Rule #1: If both classes are equal, this is the resulting class. */
2092 if (class1
== class2
)
2095 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2097 if (class1
== X86_64_NO_CLASS
)
2099 if (class2
== X86_64_NO_CLASS
)
2102 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2103 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2104 return X86_64_MEMORY_CLASS
;
2106 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2107 if ((class1
== X86_64_INTEGERSI_CLASS
2108 && (class2
== X86_64_SSESF_CLASS
|| class2
== X86_64_SSEHF_CLASS
))
2109 || (class2
== X86_64_INTEGERSI_CLASS
2110 && (class1
== X86_64_SSESF_CLASS
|| class1
== X86_64_SSEHF_CLASS
)))
2111 return X86_64_INTEGERSI_CLASS
;
2112 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2113 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2114 return X86_64_INTEGER_CLASS
;
2116 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2118 if (class1
== X86_64_X87_CLASS
2119 || class1
== X86_64_X87UP_CLASS
2120 || class1
== X86_64_COMPLEX_X87_CLASS
2121 || class2
== X86_64_X87_CLASS
2122 || class2
== X86_64_X87UP_CLASS
2123 || class2
== X86_64_COMPLEX_X87_CLASS
)
2124 return X86_64_MEMORY_CLASS
;
2126 /* Rule #6: Otherwise class SSE is used. */
2127 return X86_64_SSE_CLASS
;
2130 /* Classify the argument of type TYPE and mode MODE.
2131 CLASSES will be filled by the register class used to pass each word
2132 of the operand. The number of words is returned. In case the parameter
2133 should be passed in memory, 0 is returned. As a special case for zero
2134 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2136 BIT_OFFSET is used internally for handling records and specifies offset
2137 of the offset in bits modulo 512 to avoid overflow cases.
2139 See the x86-64 PS ABI for details.
2143 classify_argument (machine_mode mode
, const_tree type
,
2144 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
,
2145 int &zero_width_bitfields
)
2148 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2149 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
2151 /* Variable sized entities are always passed/returned in memory. */
2155 if (mode
!= VOIDmode
)
2157 /* The value of "named" doesn't matter. */
2158 function_arg_info
arg (const_cast<tree
> (type
), mode
, /*named=*/true);
2159 if (targetm
.calls
.must_pass_in_stack (arg
))
2163 if (type
&& (AGGREGATE_TYPE_P (type
)
2164 || (TREE_CODE (type
) == BITINT_TYPE
&& words
> 1)))
2168 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2170 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2174 for (i
= 0; i
< words
; i
++)
2175 classes
[i
] = X86_64_NO_CLASS
;
2177 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2178 signalize memory class, so handle it as special case. */
2181 classes
[0] = X86_64_NO_CLASS
;
2185 /* Classify each field of record and merge classes. */
2186 switch (TREE_CODE (type
))
2189 /* And now merge the fields of structure. */
2190 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2192 if (TREE_CODE (field
) == FIELD_DECL
)
2196 if (TREE_TYPE (field
) == error_mark_node
)
2199 /* Bitfields are always classified as integer. Handle them
2200 early, since later code would consider them to be
2201 misaligned integers. */
2202 if (DECL_BIT_FIELD (field
))
2204 if (integer_zerop (DECL_SIZE (field
)))
2206 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
2208 if (zero_width_bitfields
!= 2)
2210 zero_width_bitfields
= 1;
2214 for (i
= (int_bit_position (field
)
2215 + (bit_offset
% 64)) / 8 / 8;
2216 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2217 + tree_to_shwi (DECL_SIZE (field
))
2220 = merge_classes (X86_64_INTEGER_CLASS
, classes
[i
]);
2226 type
= TREE_TYPE (field
);
2228 /* Flexible array member is ignored. */
2229 if (TYPE_MODE (type
) == BLKmode
2230 && TREE_CODE (type
) == ARRAY_TYPE
2231 && TYPE_SIZE (type
) == NULL_TREE
2232 && TYPE_DOMAIN (type
) != NULL_TREE
2233 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
2238 if (!warned
&& warn_psabi
)
2241 inform (input_location
,
2242 "the ABI of passing struct with"
2243 " a flexible array member has"
2244 " changed in GCC 4.4");
2248 num
= classify_argument (TYPE_MODE (type
), type
,
2250 (int_bit_position (field
)
2251 + bit_offset
) % 512,
2252 zero_width_bitfields
);
2255 pos
= (int_bit_position (field
)
2256 + (bit_offset
% 64)) / 8 / 8;
2257 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
2259 = merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2266 /* Arrays are handled as small records. */
2269 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2270 TREE_TYPE (type
), subclasses
, bit_offset
,
2271 zero_width_bitfields
);
2275 /* The partial classes are now full classes. */
2276 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2277 subclasses
[0] = X86_64_SSE_CLASS
;
2278 if (subclasses
[0] == X86_64_SSEHF_CLASS
&& bytes
!= 2)
2279 subclasses
[0] = X86_64_SSE_CLASS
;
2280 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
2281 && !((bit_offset
% 64) == 0 && bytes
== 4))
2282 subclasses
[0] = X86_64_INTEGER_CLASS
;
2284 for (i
= 0; i
< words
; i
++)
2285 classes
[i
] = subclasses
[i
% num
];
2290 case QUAL_UNION_TYPE
:
2291 /* Unions are similar to RECORD_TYPE but offset is always 0.
2293 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2295 if (TREE_CODE (field
) == FIELD_DECL
)
2299 if (TREE_TYPE (field
) == error_mark_node
)
2302 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2303 TREE_TYPE (field
), subclasses
,
2304 bit_offset
, zero_width_bitfields
);
2307 for (i
= 0; i
< num
&& i
< words
; i
++)
2308 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2314 /* _BitInt(N) for N > 64 is passed as structure containing
2315 (N + 63) / 64 64-bit elements. */
2318 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2327 /* When size > 16 bytes, if the first one isn't
2328 X86_64_SSE_CLASS or any other ones aren't
2329 X86_64_SSEUP_CLASS, everything should be passed in
2331 if (classes
[0] != X86_64_SSE_CLASS
)
2334 for (i
= 1; i
< words
; i
++)
2335 if (classes
[i
] != X86_64_SSEUP_CLASS
)
2339 /* Final merger cleanup. */
2340 for (i
= 0; i
< words
; i
++)
2342 /* If one class is MEMORY, everything should be passed in
2344 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2347 /* The X86_64_SSEUP_CLASS should be always preceded by
2348 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2349 if (classes
[i
] == X86_64_SSEUP_CLASS
2350 && classes
[i
- 1] != X86_64_SSE_CLASS
2351 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
2353 /* The first one should never be X86_64_SSEUP_CLASS. */
2354 gcc_assert (i
!= 0);
2355 classes
[i
] = X86_64_SSE_CLASS
;
2358 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2359 everything should be passed in memory. */
2360 if (classes
[i
] == X86_64_X87UP_CLASS
2361 && (classes
[i
- 1] != X86_64_X87_CLASS
))
2365 /* The first one should never be X86_64_X87UP_CLASS. */
2366 gcc_assert (i
!= 0);
2367 if (!warned
&& warn_psabi
)
2370 inform (input_location
,
2371 "the ABI of passing union with %<long double%>"
2372 " has changed in GCC 4.4");
2380 /* Compute alignment needed. We align all types to natural boundaries with
2381 exception of XFmode that is aligned to 64bits. */
2382 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2384 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2387 mode_alignment
= 128;
2388 else if (mode
== XCmode
)
2389 mode_alignment
= 256;
2390 if (COMPLEX_MODE_P (mode
))
2391 mode_alignment
/= 2;
2392 /* Misaligned fields are always returned in memory. */
2393 if (bit_offset
% mode_alignment
)
2397 /* for V1xx modes, just use the base mode */
2398 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
2399 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
2400 mode
= GET_MODE_INNER (mode
);
2402 /* Classification of atomic types. */
2407 classes
[0] = X86_64_SSE_CLASS
;
2410 classes
[0] = X86_64_SSE_CLASS
;
2411 classes
[1] = X86_64_SSEUP_CLASS
;
2421 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
2423 /* Analyze last 128 bits only. */
2424 size
= (size
- 1) & 0x7f;
2428 classes
[0] = X86_64_INTEGERSI_CLASS
;
2433 classes
[0] = X86_64_INTEGER_CLASS
;
2436 else if (size
< 64+32)
2438 classes
[0] = X86_64_INTEGER_CLASS
;
2439 classes
[1] = X86_64_INTEGERSI_CLASS
;
2442 else if (size
< 64+64)
2444 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2452 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2456 /* OImode shouldn't be used directly. */
2462 if (!(bit_offset
% 64))
2463 classes
[0] = X86_64_SSEHF_CLASS
;
2465 classes
[0] = X86_64_SSE_CLASS
;
2468 if (!(bit_offset
% 64))
2469 classes
[0] = X86_64_SSESF_CLASS
;
2471 classes
[0] = X86_64_SSE_CLASS
;
2474 classes
[0] = X86_64_SSEDF_CLASS
;
2477 classes
[0] = X86_64_X87_CLASS
;
2478 classes
[1] = X86_64_X87UP_CLASS
;
2481 classes
[0] = X86_64_SSE_CLASS
;
2482 classes
[1] = X86_64_SSEUP_CLASS
;
2486 classes
[0] = X86_64_SSE_CLASS
;
2487 if (!(bit_offset
% 64))
2491 classes
[1] = X86_64_SSEHF_CLASS
;
2495 classes
[0] = X86_64_SSE_CLASS
;
2496 if (!(bit_offset
% 64))
2502 if (!warned
&& warn_psabi
)
2505 inform (input_location
,
2506 "the ABI of passing structure with %<complex float%>"
2507 " member has changed in GCC 4.4");
2509 classes
[1] = X86_64_SSESF_CLASS
;
2513 classes
[0] = X86_64_SSEDF_CLASS
;
2514 classes
[1] = X86_64_SSEDF_CLASS
;
2517 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2520 /* This modes is larger than 16 bytes. */
2530 classes
[0] = X86_64_SSE_CLASS
;
2531 classes
[1] = X86_64_SSEUP_CLASS
;
2532 classes
[2] = X86_64_SSEUP_CLASS
;
2533 classes
[3] = X86_64_SSEUP_CLASS
;
2543 classes
[0] = X86_64_SSE_CLASS
;
2544 classes
[1] = X86_64_SSEUP_CLASS
;
2545 classes
[2] = X86_64_SSEUP_CLASS
;
2546 classes
[3] = X86_64_SSEUP_CLASS
;
2547 classes
[4] = X86_64_SSEUP_CLASS
;
2548 classes
[5] = X86_64_SSEUP_CLASS
;
2549 classes
[6] = X86_64_SSEUP_CLASS
;
2550 classes
[7] = X86_64_SSEUP_CLASS
;
2560 classes
[0] = X86_64_SSE_CLASS
;
2561 classes
[1] = X86_64_SSEUP_CLASS
;
2573 classes
[0] = X86_64_SSE_CLASS
;
2579 gcc_assert (VECTOR_MODE_P (mode
));
2584 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2586 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2587 classes
[0] = X86_64_INTEGERSI_CLASS
;
2589 classes
[0] = X86_64_INTEGER_CLASS
;
2590 classes
[1] = X86_64_INTEGER_CLASS
;
2591 return 1 + (bytes
> 8);
2595 /* Wrapper around classify_argument with the extra zero_width_bitfields
2596 argument, to diagnose GCC 12.1 ABI differences for C. */
2599 classify_argument (machine_mode mode
, const_tree type
,
2600 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2602 int zero_width_bitfields
= 0;
2603 static bool warned
= false;
2604 int n
= classify_argument (mode
, type
, classes
, bit_offset
,
2605 zero_width_bitfields
);
2606 if (!zero_width_bitfields
|| warned
|| !warn_psabi
)
2608 enum x86_64_reg_class alt_classes
[MAX_CLASSES
];
2609 zero_width_bitfields
= 2;
2610 if (classify_argument (mode
, type
, alt_classes
, bit_offset
,
2611 zero_width_bitfields
) != n
)
2612 zero_width_bitfields
= 3;
2614 for (int i
= 0; i
< n
; i
++)
2615 if (classes
[i
] != alt_classes
[i
])
2617 zero_width_bitfields
= 3;
2620 if (zero_width_bitfields
== 3)
2624 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
2626 inform (input_location
,
2627 "the ABI of passing C structures with zero-width bit-fields"
2628 " has changed in GCC %{12.1%}", url
);
2633 /* Examine the argument and return set number of register required in each
2634 class. Return true iff parameter should be passed in memory. */
2637 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
2638 int *int_nregs
, int *sse_nregs
)
2640 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2641 int n
= classify_argument (mode
, type
, regclass
, 0);
2648 for (n
--; n
>= 0; n
--)
2649 switch (regclass
[n
])
2651 case X86_64_INTEGER_CLASS
:
2652 case X86_64_INTEGERSI_CLASS
:
2655 case X86_64_SSE_CLASS
:
2656 case X86_64_SSEHF_CLASS
:
2657 case X86_64_SSESF_CLASS
:
2658 case X86_64_SSEDF_CLASS
:
2661 case X86_64_NO_CLASS
:
2662 case X86_64_SSEUP_CLASS
:
2664 case X86_64_X87_CLASS
:
2665 case X86_64_X87UP_CLASS
:
2666 case X86_64_COMPLEX_X87_CLASS
:
2670 case X86_64_MEMORY_CLASS
:
2677 /* Construct container for the argument used by GCC interface. See
2678 FUNCTION_ARG for the detailed description. */
2681 construct_container (machine_mode mode
, machine_mode orig_mode
,
2682 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
2683 const int *intreg
, int sse_regno
)
2685 /* The following variables hold the static issued_error state. */
2686 static bool issued_sse_arg_error
;
2687 static bool issued_sse_ret_error
;
2688 static bool issued_x87_ret_error
;
2690 machine_mode tmpmode
;
2692 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2693 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2697 int needed_sseregs
, needed_intregs
;
2698 rtx exp
[MAX_CLASSES
];
2701 n
= classify_argument (mode
, type
, regclass
, 0);
2704 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
2707 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2710 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2711 some less clueful developer tries to use floating-point anyway. */
2713 && (!TARGET_SSE
|| (VALID_SSE2_TYPE_MODE (mode
) && !TARGET_SSE2
)))
2715 /* Return early if we shouldn't raise an error for invalid
2717 if (cfun
!= NULL
&& cfun
->machine
->silent_p
)
2721 if (!issued_sse_ret_error
)
2723 if (VALID_SSE2_TYPE_MODE (mode
))
2724 error ("SSE register return with SSE2 disabled");
2726 error ("SSE register return with SSE disabled");
2727 issued_sse_ret_error
= true;
2730 else if (!issued_sse_arg_error
)
2732 if (VALID_SSE2_TYPE_MODE (mode
))
2733 error ("SSE register argument with SSE2 disabled");
2735 error ("SSE register argument with SSE disabled");
2736 issued_sse_arg_error
= true;
2741 /* Likewise, error if the ABI requires us to return values in the
2742 x87 registers and the user specified -mno-80387. */
2743 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
2744 for (i
= 0; i
< n
; i
++)
2745 if (regclass
[i
] == X86_64_X87_CLASS
2746 || regclass
[i
] == X86_64_X87UP_CLASS
2747 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
2749 /* Return early if we shouldn't raise an error for invalid
2751 if (cfun
!= NULL
&& cfun
->machine
->silent_p
)
2753 if (!issued_x87_ret_error
)
2755 error ("x87 register return with x87 disabled");
2756 issued_x87_ret_error
= true;
2761 /* First construct simple cases. Avoid SCmode, since we want to use
2762 single register to pass this type. */
2763 if (n
== 1 && mode
!= SCmode
&& mode
!= HCmode
)
2764 switch (regclass
[0])
2766 case X86_64_INTEGER_CLASS
:
2767 case X86_64_INTEGERSI_CLASS
:
2768 return gen_rtx_REG (mode
, intreg
[0]);
2769 case X86_64_SSE_CLASS
:
2770 case X86_64_SSEHF_CLASS
:
2771 case X86_64_SSESF_CLASS
:
2772 case X86_64_SSEDF_CLASS
:
2773 if (mode
!= BLKmode
)
2774 return gen_reg_or_parallel (mode
, orig_mode
,
2775 GET_SSE_REGNO (sse_regno
));
2777 case X86_64_X87_CLASS
:
2778 case X86_64_COMPLEX_X87_CLASS
:
2779 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2780 case X86_64_NO_CLASS
:
2781 /* Zero sized array, struct or class. */
2787 && regclass
[0] == X86_64_SSE_CLASS
2788 && regclass
[1] == X86_64_SSEUP_CLASS
2790 return gen_reg_or_parallel (mode
, orig_mode
,
2791 GET_SSE_REGNO (sse_regno
));
2793 && regclass
[0] == X86_64_SSE_CLASS
2794 && regclass
[1] == X86_64_SSEUP_CLASS
2795 && regclass
[2] == X86_64_SSEUP_CLASS
2796 && regclass
[3] == X86_64_SSEUP_CLASS
2798 return gen_reg_or_parallel (mode
, orig_mode
,
2799 GET_SSE_REGNO (sse_regno
));
2801 && regclass
[0] == X86_64_SSE_CLASS
2802 && regclass
[1] == X86_64_SSEUP_CLASS
2803 && regclass
[2] == X86_64_SSEUP_CLASS
2804 && regclass
[3] == X86_64_SSEUP_CLASS
2805 && regclass
[4] == X86_64_SSEUP_CLASS
2806 && regclass
[5] == X86_64_SSEUP_CLASS
2807 && regclass
[6] == X86_64_SSEUP_CLASS
2808 && regclass
[7] == X86_64_SSEUP_CLASS
2810 return gen_reg_or_parallel (mode
, orig_mode
,
2811 GET_SSE_REGNO (sse_regno
));
2813 && regclass
[0] == X86_64_X87_CLASS
2814 && regclass
[1] == X86_64_X87UP_CLASS
)
2815 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2818 && regclass
[0] == X86_64_INTEGER_CLASS
2819 && regclass
[1] == X86_64_INTEGER_CLASS
2820 && (mode
== CDImode
|| mode
== TImode
|| mode
== BLKmode
)
2821 && intreg
[0] + 1 == intreg
[1])
2823 if (mode
== BLKmode
)
2825 /* Use TImode for BLKmode values in 2 integer registers. */
2826 exp
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
2827 gen_rtx_REG (TImode
, intreg
[0]),
2829 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (1));
2830 XVECEXP (ret
, 0, 0) = exp
[0];
2834 return gen_rtx_REG (mode
, intreg
[0]);
2837 /* Otherwise figure out the entries of the PARALLEL. */
2838 for (i
= 0; i
< n
; i
++)
2842 switch (regclass
[i
])
2844 case X86_64_NO_CLASS
:
2846 case X86_64_INTEGER_CLASS
:
2847 case X86_64_INTEGERSI_CLASS
:
2848 /* Merge TImodes on aligned occasions here too. */
2849 if (i
* 8 + 8 > bytes
)
2851 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
2852 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
2853 /* We've requested 24 bytes we
2854 don't have mode for. Use DImode. */
2857 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
2862 = gen_rtx_EXPR_LIST (VOIDmode
,
2863 gen_rtx_REG (tmpmode
, *intreg
),
2867 case X86_64_SSEHF_CLASS
:
2868 tmpmode
= (mode
== BFmode
? BFmode
: HFmode
);
2870 = gen_rtx_EXPR_LIST (VOIDmode
,
2871 gen_rtx_REG (tmpmode
,
2872 GET_SSE_REGNO (sse_regno
)),
2876 case X86_64_SSESF_CLASS
:
2878 = gen_rtx_EXPR_LIST (VOIDmode
,
2879 gen_rtx_REG (SFmode
,
2880 GET_SSE_REGNO (sse_regno
)),
2884 case X86_64_SSEDF_CLASS
:
2886 = gen_rtx_EXPR_LIST (VOIDmode
,
2887 gen_rtx_REG (DFmode
,
2888 GET_SSE_REGNO (sse_regno
)),
2892 case X86_64_SSE_CLASS
:
2900 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
2910 && regclass
[1] == X86_64_SSEUP_CLASS
2911 && regclass
[2] == X86_64_SSEUP_CLASS
2912 && regclass
[3] == X86_64_SSEUP_CLASS
);
2918 && regclass
[1] == X86_64_SSEUP_CLASS
2919 && regclass
[2] == X86_64_SSEUP_CLASS
2920 && regclass
[3] == X86_64_SSEUP_CLASS
2921 && regclass
[4] == X86_64_SSEUP_CLASS
2922 && regclass
[5] == X86_64_SSEUP_CLASS
2923 && regclass
[6] == X86_64_SSEUP_CLASS
2924 && regclass
[7] == X86_64_SSEUP_CLASS
);
2932 = gen_rtx_EXPR_LIST (VOIDmode
,
2933 gen_rtx_REG (tmpmode
,
2934 GET_SSE_REGNO (sse_regno
)),
2943 /* Empty aligned struct, union or class. */
2947 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2948 for (i
= 0; i
< nexps
; i
++)
2949 XVECEXP (ret
, 0, i
) = exp
[i
];
2953 /* Update the data in CUM to advance over an argument of mode MODE
2954 and data type TYPE. (TYPE is null for libcalls where that information
2955 may not be available.)
2957 Return a number of integer regsiters advanced over. */
2960 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2961 const_tree type
, HOST_WIDE_INT bytes
,
2962 HOST_WIDE_INT words
)
2965 bool error_p
= false;
2969 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2970 bytes in registers. */
2971 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
2991 cum
->words
+= words
;
2992 cum
->nregs
-= words
;
2993 cum
->regno
+= words
;
2994 if (cum
->nregs
>= 0)
2996 if (cum
->nregs
<= 0)
2999 cfun
->machine
->arg_reg_available
= false;
3005 /* OImode shouldn't be used directly. */
3009 if (cum
->float_in_sse
== -1)
3011 if (cum
->float_in_sse
< 2)
3015 if (cum
->float_in_sse
== -1)
3017 if (cum
->float_in_sse
< 1)
3046 if (!type
|| !AGGREGATE_TYPE_P (type
))
3048 cum
->sse_words
+= words
;
3049 cum
->sse_nregs
-= 1;
3050 cum
->sse_regno
+= 1;
3051 if (cum
->sse_nregs
<= 0)
3067 if (!type
|| !AGGREGATE_TYPE_P (type
))
3069 cum
->mmx_words
+= words
;
3070 cum
->mmx_nregs
-= 1;
3071 cum
->mmx_regno
+= 1;
3072 if (cum
->mmx_nregs
<= 0)
3082 cum
->float_in_sse
= 0;
3083 error ("calling %qD with SSE calling convention without "
3084 "SSE/SSE2 enabled", cum
->decl
);
3085 sorry ("this is a GCC bug that can be worked around by adding "
3086 "attribute used to function called");
3093 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3094 const_tree type
, HOST_WIDE_INT words
, bool named
)
3096 int int_nregs
, sse_nregs
;
3098 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3099 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
3100 || VALID_AVX256_REG_MODE (mode
)))
3103 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
3104 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3106 cum
->nregs
-= int_nregs
;
3107 cum
->sse_nregs
-= sse_nregs
;
3108 cum
->regno
+= int_nregs
;
3109 cum
->sse_regno
+= sse_nregs
;
3114 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
3115 cum
->words
= ROUND_UP (cum
->words
, align
);
3116 cum
->words
+= words
;
3122 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
3123 HOST_WIDE_INT words
)
3125 /* Otherwise, this should be passed indirect. */
3126 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
3128 cum
->words
+= words
;
3138 /* Update the data in CUM to advance over argument ARG. */
3141 ix86_function_arg_advance (cumulative_args_t cum_v
,
3142 const function_arg_info
&arg
)
3144 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3145 machine_mode mode
= arg
.mode
;
3146 HOST_WIDE_INT bytes
, words
;
3149 /* The argument of interrupt handler is a special case and is
3150 handled in ix86_function_arg. */
3151 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3154 bytes
= arg
.promoted_size_in_bytes ();
3155 words
= CEIL (bytes
, UNITS_PER_WORD
);
3158 mode
= type_natural_mode (arg
.type
, NULL
, false);
3162 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3164 if (call_abi
== MS_ABI
)
3165 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
3167 nregs
= function_arg_advance_64 (cum
, mode
, arg
.type
, words
,
3171 nregs
= function_arg_advance_32 (cum
, mode
, arg
.type
, bytes
, words
);
3175 /* Track if there are outgoing arguments on stack. */
3177 cfun
->machine
->outgoing_args_on_stack
= true;
3181 /* Define where to put the arguments to a function.
3182 Value is zero to push the argument on the stack,
3183 or a hard register in which to store the argument.
3185 MODE is the argument's machine mode.
3186 TYPE is the data type of the argument (as a tree).
3187 This is null for libcalls where that information may
3189 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3190 the preceding args and about the function being called.
3191 NAMED is nonzero if this argument is a named parameter
3192 (otherwise it is an extra parameter matching an ellipsis). */
3195 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3196 machine_mode orig_mode
, const_tree type
,
3197 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3199 bool error_p
= false;
3201 /* Avoid the AL settings for the Unix64 ABI. */
3202 if (mode
== VOIDmode
)
3207 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3208 bytes in registers. */
3209 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
3228 if (words
<= cum
->nregs
)
3230 int regno
= cum
->regno
;
3232 /* Fastcall allocates the first two DWORD (SImode) or
3233 smaller arguments to ECX and EDX if it isn't an
3239 || (type
&& AGGREGATE_TYPE_P (type
)))
3242 /* ECX not EAX is the first allocated register. */
3243 if (regno
== AX_REG
)
3246 return gen_rtx_REG (mode
, regno
);
3251 if (cum
->float_in_sse
== -1)
3253 if (cum
->float_in_sse
< 2)
3257 if (cum
->float_in_sse
== -1)
3259 if (cum
->float_in_sse
< 1)
3263 /* In 32bit, we pass TImode in xmm registers. */
3272 if (!type
|| !AGGREGATE_TYPE_P (type
))
3275 return gen_reg_or_parallel (mode
, orig_mode
,
3276 cum
->sse_regno
+ FIRST_SSE_REG
);
3282 /* OImode and XImode shouldn't be used directly. */
3301 if (!type
|| !AGGREGATE_TYPE_P (type
))
3304 return gen_reg_or_parallel (mode
, orig_mode
,
3305 cum
->sse_regno
+ FIRST_SSE_REG
);
3317 if (!type
|| !AGGREGATE_TYPE_P (type
))
3320 return gen_reg_or_parallel (mode
, orig_mode
,
3321 cum
->mmx_regno
+ FIRST_MMX_REG
);
3327 cum
->float_in_sse
= 0;
3328 error ("calling %qD with SSE calling convention without "
3329 "SSE/SSE2 enabled", cum
->decl
);
3330 sorry ("this is a GCC bug that can be worked around by adding "
3331 "attribute used to function called");
3338 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3339 machine_mode orig_mode
, const_tree type
, bool named
)
3341 /* Handle a hidden AL argument containing number of registers
3342 for varargs x86-64 functions. */
3343 if (mode
== VOIDmode
)
3344 return GEN_INT (cum
->maybe_vaarg
3345 ? (cum
->sse_nregs
< 0
3346 ? X86_64_SSE_REGPARM_MAX
3371 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3377 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3379 &x86_64_int_parameter_registers
[cum
->regno
],
3384 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3385 machine_mode orig_mode
, bool named
, const_tree type
,
3386 HOST_WIDE_INT bytes
)
3390 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3391 We use value of -2 to specify that current function call is MSABI. */
3392 if (mode
== VOIDmode
)
3393 return GEN_INT (-2);
3395 /* If we've run out of registers, it goes on the stack. */
3396 if (cum
->nregs
== 0)
3399 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3401 /* Only floating point modes are passed in anything but integer regs. */
3402 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3406 if (type
== NULL_TREE
|| !AGGREGATE_TYPE_P (type
))
3407 regno
= cum
->regno
+ FIRST_SSE_REG
;
3413 /* Unnamed floating parameters are passed in both the
3414 SSE and integer registers. */
3415 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3416 t2
= gen_rtx_REG (mode
, regno
);
3417 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3418 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3419 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3422 /* Handle aggregated types passed in register. */
3423 if (orig_mode
== BLKmode
)
3425 if (bytes
> 0 && bytes
<= 8)
3426 mode
= (bytes
> 4 ? DImode
: SImode
);
3427 if (mode
== BLKmode
)
3431 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3434 /* Return where to put the arguments to a function.
3435 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3437 ARG describes the argument while CUM gives information about the
3438 preceding args and about the function being called. */
3441 ix86_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3443 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3444 machine_mode mode
= arg
.mode
;
3445 HOST_WIDE_INT bytes
, words
;
3448 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3450 gcc_assert (arg
.type
!= NULL_TREE
);
3451 if (POINTER_TYPE_P (arg
.type
))
3453 /* This is the pointer argument. */
3454 gcc_assert (TYPE_MODE (arg
.type
) == ptr_mode
);
3455 /* It is at -WORD(AP) in the current frame in interrupt and
3456 exception handlers. */
3457 reg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
3461 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
3462 && TREE_CODE (arg
.type
) == INTEGER_TYPE
3463 && TYPE_MODE (arg
.type
) == word_mode
);
3464 /* The error code is the word-mode integer argument at
3465 -2 * WORD(AP) in the current frame of the exception
3467 reg
= gen_rtx_MEM (word_mode
,
3468 plus_constant (Pmode
,
3470 -2 * UNITS_PER_WORD
));
3475 bytes
= arg
.promoted_size_in_bytes ();
3476 words
= CEIL (bytes
, UNITS_PER_WORD
);
3478 /* To simplify the code below, represent vector types with a vector mode
3479 even if MMX/SSE are not active. */
3480 if (arg
.type
&& VECTOR_TYPE_P (arg
.type
))
3481 mode
= type_natural_mode (arg
.type
, cum
, false);
3485 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3487 if (call_abi
== MS_ABI
)
3488 reg
= function_arg_ms_64 (cum
, mode
, arg
.mode
, arg
.named
,
3491 reg
= function_arg_64 (cum
, mode
, arg
.mode
, arg
.type
, arg
.named
);
3494 reg
= function_arg_32 (cum
, mode
, arg
.mode
, arg
.type
, bytes
, words
);
3496 /* Track if there are outgoing arguments on stack. */
3497 if (reg
== NULL_RTX
&& cum
->caller
)
3498 cfun
->machine
->outgoing_args_on_stack
= true;
3503 /* A C expression that indicates when an argument must be passed by
3504 reference. If nonzero for an argument, a copy of that argument is
3505 made in memory and a pointer to the argument is passed instead of
3506 the argument itself. The pointer is passed in whatever way is
3507 appropriate for passing a pointer to that type. */
3510 ix86_pass_by_reference (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3512 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3516 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3518 /* See Windows x64 Software Convention. */
3519 if (call_abi
== MS_ABI
)
3521 HOST_WIDE_INT msize
= GET_MODE_SIZE (arg
.mode
);
3523 if (tree type
= arg
.type
)
3525 /* Arrays are passed by reference. */
3526 if (TREE_CODE (type
) == ARRAY_TYPE
)
3529 if (RECORD_OR_UNION_TYPE_P (type
))
3531 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3532 are passed by reference. */
3533 msize
= int_size_in_bytes (type
);
3537 /* __m128 is passed by reference. */
3538 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
3540 else if (arg
.type
&& int_size_in_bytes (arg
.type
) == -1)
3547 /* Return true when TYPE should be 128bit aligned for 32bit argument
3548 passing ABI. XXX: This function is obsolete and is only used for
3549 checking psABI compatibility with previous versions of GCC. */
3552 ix86_compat_aligned_value_p (const_tree type
)
3554 machine_mode mode
= TYPE_MODE (type
);
3555 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
3559 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3561 if (TYPE_ALIGN (type
) < 128)
3564 if (AGGREGATE_TYPE_P (type
))
3566 /* Walk the aggregates recursively. */
3567 switch (TREE_CODE (type
))
3571 case QUAL_UNION_TYPE
:
3575 /* Walk all the structure fields. */
3576 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3578 if (TREE_CODE (field
) == FIELD_DECL
3579 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
3586 /* Just for use if some languages passes arrays by value. */
3587 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
3598 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3599 XXX: This function is obsolete and is only used for checking psABI
3600 compatibility with previous versions of GCC. */
3603 ix86_compat_function_arg_boundary (machine_mode mode
,
3604 const_tree type
, unsigned int align
)
3606 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3607 natural boundaries. */
3608 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
3610 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3611 make an exception for SSE modes since these require 128bit
3614 The handling here differs from field_alignment. ICC aligns MMX
3615 arguments to 4 byte boundaries, while structure fields are aligned
3616 to 8 byte boundaries. */
3619 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
3620 align
= PARM_BOUNDARY
;
3624 if (!ix86_compat_aligned_value_p (type
))
3625 align
= PARM_BOUNDARY
;
3628 if (align
> BIGGEST_ALIGNMENT
)
3629 align
= BIGGEST_ALIGNMENT
;
3633 /* Return true when TYPE should be 128bit aligned for 32bit argument
3637 ix86_contains_aligned_value_p (const_tree type
)
3639 machine_mode mode
= TYPE_MODE (type
);
3641 if (mode
== XFmode
|| mode
== XCmode
)
3644 if (TYPE_ALIGN (type
) < 128)
3647 if (AGGREGATE_TYPE_P (type
))
3649 /* Walk the aggregates recursively. */
3650 switch (TREE_CODE (type
))
3654 case QUAL_UNION_TYPE
:
3658 /* Walk all the structure fields. */
3659 for (field
= TYPE_FIELDS (type
);
3661 field
= DECL_CHAIN (field
))
3663 if (TREE_CODE (field
) == FIELD_DECL
3664 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
3671 /* Just for use if some languages passes arrays by value. */
3672 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
3681 return TYPE_ALIGN (type
) >= 128;
3686 /* Gives the alignment boundary, in bits, of an argument with the
3687 specified mode and type. */
3690 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
3695 /* Since the main variant type is used for call, we convert it to
3696 the main variant type. */
3697 type
= TYPE_MAIN_VARIANT (type
);
3698 align
= TYPE_ALIGN (type
);
3699 if (TYPE_EMPTY_P (type
))
3700 return PARM_BOUNDARY
;
3703 align
= GET_MODE_ALIGNMENT (mode
);
3704 if (align
< PARM_BOUNDARY
)
3705 align
= PARM_BOUNDARY
;
3709 unsigned int saved_align
= align
;
3713 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3716 if (mode
== XFmode
|| mode
== XCmode
)
3717 align
= PARM_BOUNDARY
;
3719 else if (!ix86_contains_aligned_value_p (type
))
3720 align
= PARM_BOUNDARY
;
3723 align
= PARM_BOUNDARY
;
3728 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
3732 inform (input_location
,
3733 "the ABI for passing parameters with %d-byte"
3734 " alignment has changed in GCC 4.6",
3735 align
/ BITS_PER_UNIT
);
3742 /* Return true if N is a possible register number of function value. */
3745 ix86_function_value_regno_p (const unsigned int regno
)
3752 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
3755 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
3757 /* Complex values are returned in %st(0)/%st(1) pair. */
3760 /* TODO: The function should depend on current function ABI but
3761 builtins.cc would need updating then. Therefore we use the
3763 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
3765 return TARGET_FLOAT_RETURNS_IN_80387
;
3767 /* Complex values are returned in %xmm0/%xmm1 pair. */
3773 if (TARGET_MACHO
|| TARGET_64BIT
)
3781 /* Check whether the register REGNO should be zeroed on X86.
3782 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3783 together, no need to zero it again.
3784 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3787 zero_call_used_regno_p (const unsigned int regno
,
3788 bool all_sse_zeroed
,
3791 return GENERAL_REGNO_P (regno
)
3792 || (!all_sse_zeroed
&& SSE_REGNO_P (regno
))
3793 || MASK_REGNO_P (regno
)
3794 || (need_zero_mmx
&& MMX_REGNO_P (regno
));
3797 /* Return the machine_mode that is used to zero register REGNO. */
3800 zero_call_used_regno_mode (const unsigned int regno
)
3802 /* NB: We only need to zero the lower 32 bits for integer registers
3803 and the lower 128 bits for vector registers since destination are
3804 zero-extended to the full register width. */
3805 if (GENERAL_REGNO_P (regno
))
3807 else if (SSE_REGNO_P (regno
))
3809 else if (MASK_REGNO_P (regno
))
3811 else if (MMX_REGNO_P (regno
))
3817 /* Generate a rtx to zero all vector registers together if possible,
3818 otherwise, return NULL. */
3821 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs
)
3826 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3827 if ((LEGACY_SSE_REGNO_P (regno
)
3829 && (REX_SSE_REGNO_P (regno
)
3830 || (TARGET_AVX512F
&& EXT_REX_SSE_REGNO_P (regno
)))))
3831 && !TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3834 return gen_avx_vzeroall ();
3837 /* Generate insns to zero all st registers together.
3838 Return true when zeroing instructions are generated.
3839 Assume the number of st registers that are zeroed is num_of_st,
3840 we will emit the following sequence to zero them together:
3849 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3850 mark stack slots empty.
3852 How to compute the num_of_st:
3853 There is no direct mapping from stack registers to hard register
3854 numbers. If one stack register needs to be cleared, we don't know
3855 where in the stack the value remains. So, if any stack register
3856 needs to be cleared, the whole stack should be cleared. However,
3857 x87 stack registers that hold the return value should be excluded.
3858 x87 returns in the top (two for complex values) register, so
3859 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3860 return the value of num_of_st. */
3864 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs
)
3867 /* If the FPU is disabled, no need to zero all st registers. */
3868 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3871 unsigned int num_of_st
= 0;
3872 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3873 if ((STACK_REGNO_P (regno
) || MMX_REGNO_P (regno
))
3874 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3883 bool return_with_x87
= false;
3884 return_with_x87
= (crtl
->return_rtx
3885 && (STACK_REG_P (crtl
->return_rtx
)));
3887 bool complex_return
= false;
3888 complex_return
= (crtl
->return_rtx
3889 && COMPLEX_MODE_P (GET_MODE (crtl
->return_rtx
)));
3891 if (return_with_x87
)
3899 rtx st_reg
= gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3900 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3901 emit_insn (gen_rtx_SET (st_reg
, CONST0_RTX (XFmode
)));
3903 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3906 insn
= emit_insn (gen_rtx_SET (st_reg
, st_reg
));
3907 add_reg_note (insn
, REG_DEAD
, st_reg
);
3913 /* When the routine exit in MMX mode, if any ST register needs
3914 to be zeroed, we should clear all MMX registers except the
3915 RET_MMX_REGNO that holds the return value. */
3917 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs
,
3918 unsigned int ret_mmx_regno
)
3920 bool need_zero_all_mm
= false;
3921 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3922 if (STACK_REGNO_P (regno
)
3923 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3925 need_zero_all_mm
= true;
3929 if (!need_zero_all_mm
)
3932 machine_mode mode
= V2SImode
;
3933 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3934 if (regno
!= ret_mmx_regno
)
3936 rtx reg
= gen_rtx_REG (mode
, regno
);
3937 emit_insn (gen_rtx_SET (reg
, CONST0_RTX (mode
)));
3942 /* TARGET_ZERO_CALL_USED_REGS. */
3943 /* Generate a sequence of instructions that zero registers specified by
3944 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3947 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs
)
3949 HARD_REG_SET zeroed_hardregs
;
3950 bool all_sse_zeroed
= false;
3951 int all_st_zeroed_num
= 0;
3952 bool all_mm_zeroed
= false;
3954 CLEAR_HARD_REG_SET (zeroed_hardregs
);
3956 /* first, let's see whether we can zero all vector registers together. */
3957 rtx zero_all_vec_insn
= zero_all_vector_registers (need_zeroed_hardregs
);
3958 if (zero_all_vec_insn
)
3960 emit_insn (zero_all_vec_insn
);
3961 all_sse_zeroed
= true;
3964 /* mm/st registers are shared registers set, we should follow the following
3965 rules to clear them:
3966 MMX exit mode x87 exit mode
3967 -------------|----------------------|---------------
3968 uses x87 reg | clear all MMX | clear all x87
3969 uses MMX reg | clear individual MMX | clear all x87
3970 x87 + MMX | clear all MMX | clear all x87
3972 first, we should decide which mode (MMX mode or x87 mode) the function
3975 bool exit_with_mmx_mode
= (crtl
->return_rtx
3976 && (MMX_REG_P (crtl
->return_rtx
)));
3978 if (!exit_with_mmx_mode
)
3979 /* x87 exit mode, we should zero all st registers together. */
3981 all_st_zeroed_num
= zero_all_st_registers (need_zeroed_hardregs
);
3983 if (all_st_zeroed_num
> 0)
3984 for (unsigned int regno
= FIRST_STACK_REG
; regno
<= LAST_STACK_REG
; regno
++)
3985 /* x87 stack registers that hold the return value should be excluded.
3986 x87 returns in the top (two for complex values) register. */
3987 if (all_st_zeroed_num
== 8
3988 || !((all_st_zeroed_num
>= 6 && regno
== REGNO (crtl
->return_rtx
))
3989 || (all_st_zeroed_num
== 6
3990 && (regno
== (REGNO (crtl
->return_rtx
) + 1)))))
3991 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3994 /* MMX exit mode, check whether we can zero all mm registers. */
3996 unsigned int exit_mmx_regno
= REGNO (crtl
->return_rtx
);
3997 all_mm_zeroed
= zero_all_mm_registers (need_zeroed_hardregs
,
4000 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
4001 if (regno
!= exit_mmx_regno
)
4002 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
4005 /* Now, generate instructions to zero all the other registers. */
4007 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4009 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
4011 if (!zero_call_used_regno_p (regno
, all_sse_zeroed
,
4012 exit_with_mmx_mode
&& !all_mm_zeroed
))
4015 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
4017 machine_mode mode
= zero_call_used_regno_mode (regno
);
4019 rtx reg
= gen_rtx_REG (mode
, regno
);
4020 rtx tmp
= gen_rtx_SET (reg
, CONST0_RTX (mode
));
4025 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
4027 rtx clob
= gen_rtx_CLOBBER (VOIDmode
,
4028 gen_rtx_REG (CCmode
,
4030 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
4046 return zeroed_hardregs
;
4049 /* Define how to find the value returned by a function.
4050 VALTYPE is the data type of the value (as a tree).
4051 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4052 otherwise, FUNC is 0. */
4055 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
4056 const_tree fntype
, const_tree fn
)
4060 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4061 we normally prevent this case when mmx is not available. However
4062 some ABIs may require the result to be returned like DImode. */
4063 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4064 regno
= FIRST_MMX_REG
;
4066 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4067 we prevent this case when sse is not available. However some ABIs
4068 may require the result to be returned like integer TImode. */
4069 else if (mode
== TImode
4070 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4071 regno
= FIRST_SSE_REG
;
4073 /* 32-byte vector modes in %ymm0. */
4074 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
4075 regno
= FIRST_SSE_REG
;
4077 /* 64-byte vector modes in %zmm0. */
4078 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
4079 regno
= FIRST_SSE_REG
;
4081 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4082 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
4083 regno
= FIRST_FLOAT_REG
;
4085 /* Most things go in %eax. */
4088 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4089 if (mode
== HFmode
|| mode
== BFmode
)
4093 error ("SSE register return with SSE2 disabled");
4097 regno
= FIRST_SSE_REG
;
4103 error ("SSE register return with SSE2 disabled");
4105 rtx ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc(1));
4107 = gen_rtx_EXPR_LIST (VOIDmode
,
4108 gen_rtx_REG (SImode
,
4109 TARGET_SSE2
? FIRST_SSE_REG
: AX_REG
),
4114 /* Override FP return register with %xmm0 for local functions when
4115 SSE math is enabled or for functions with sseregparm attribute. */
4116 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4118 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
4119 if (sse_level
== -1)
4121 error ("calling %qD with SSE calling convention without "
4122 "SSE/SSE2 enabled", fn
);
4123 sorry ("this is a GCC bug that can be worked around by adding "
4124 "attribute used to function called");
4126 else if ((sse_level
>= 1 && mode
== SFmode
)
4127 || (sse_level
== 2 && mode
== DFmode
))
4128 regno
= FIRST_SSE_REG
;
4131 /* OImode shouldn't be used directly. */
4132 gcc_assert (mode
!= OImode
);
4134 return gen_rtx_REG (orig_mode
, regno
);
4138 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
4143 /* Handle libcalls, which don't provide a type node. */
4144 if (valtype
== NULL
)
4161 regno
= FIRST_SSE_REG
;
4165 regno
= FIRST_FLOAT_REG
;
4173 return gen_rtx_REG (mode
, regno
);
4175 else if (POINTER_TYPE_P (valtype
))
4177 /* Pointers are always returned in word_mode. */
4181 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4182 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
4183 x86_64_int_return_registers
, 0);
4185 /* For zero sized structures, construct_container returns NULL, but we
4186 need to keep rest of compiler happy by returning meaningful value. */
4188 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
4194 function_value_ms_32 (machine_mode orig_mode
, machine_mode mode
,
4195 const_tree fntype
, const_tree fn
, const_tree valtype
)
4199 /* Floating point return values in %st(0)
4200 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4201 if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
4202 && (GET_MODE_SIZE (mode
) > 8
4203 || valtype
== NULL_TREE
|| !AGGREGATE_TYPE_P (valtype
)))
4205 regno
= FIRST_FLOAT_REG
;
4206 return gen_rtx_REG (orig_mode
, regno
);
4209 return function_value_32(orig_mode
, mode
, fntype
,fn
);
4213 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
4216 unsigned int regno
= AX_REG
;
4220 switch (GET_MODE_SIZE (mode
))
4223 if (valtype
!= NULL_TREE
4224 && !VECTOR_INTEGER_TYPE_P (valtype
)
4225 && !VECTOR_INTEGER_TYPE_P (valtype
)
4226 && !INTEGRAL_TYPE_P (valtype
)
4227 && !VECTOR_FLOAT_TYPE_P (valtype
))
4229 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4230 && !COMPLEX_MODE_P (mode
))
4231 regno
= FIRST_SSE_REG
;
4235 if (valtype
!= NULL_TREE
&& AGGREGATE_TYPE_P (valtype
))
4237 if (mode
== SFmode
|| mode
== DFmode
)
4238 regno
= FIRST_SSE_REG
;
4244 return gen_rtx_REG (orig_mode
, regno
);
4248 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4249 machine_mode orig_mode
, machine_mode mode
)
4251 const_tree fn
, fntype
;
4254 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4255 fn
= fntype_or_decl
;
4256 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4258 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4261 return function_value_ms_64 (orig_mode
, mode
, valtype
);
4263 return function_value_ms_32 (orig_mode
, mode
, fntype
, fn
, valtype
);
4265 else if (TARGET_64BIT
)
4266 return function_value_64 (orig_mode
, mode
, valtype
);
4268 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4272 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
4274 machine_mode mode
, orig_mode
;
4276 orig_mode
= TYPE_MODE (valtype
);
4277 mode
= type_natural_mode (valtype
, NULL
, true);
4278 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4281 /* Pointer function arguments and return values are promoted to
4282 word_mode for normal functions. */
4285 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
4286 int *punsignedp
, const_tree fntype
,
4289 if (cfun
->machine
->func_type
== TYPE_NORMAL
4290 && type
!= NULL_TREE
4291 && POINTER_TYPE_P (type
))
4293 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
4296 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
4300 /* Return true if a structure, union or array with MODE containing FIELD
4301 should be accessed using BLKmode. */
4304 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
4306 /* Union with XFmode must be in BLKmode. */
4307 return (mode
== XFmode
4308 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
4309 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
4313 ix86_libcall_value (machine_mode mode
)
4315 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4318 /* Return true iff type is returned in memory. */
4321 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4323 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
4328 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4330 size
= int_size_in_bytes (type
);
4332 /* __m128 is returned in xmm0. */
4333 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
4334 || INTEGRAL_TYPE_P (type
)
4335 || VECTOR_FLOAT_TYPE_P (type
))
4336 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4337 && !COMPLEX_MODE_P (mode
)
4338 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
4341 /* Otherwise, the size must be exactly in [1248]. */
4342 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
4346 int needed_intregs
, needed_sseregs
;
4348 return examine_argument (mode
, type
, 1,
4349 &needed_intregs
, &needed_sseregs
);
4354 size
= int_size_in_bytes (type
);
4356 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4357 bytes in registers. */
4359 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
4361 if (mode
== BLKmode
)
4364 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4367 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4369 /* User-created vectors small enough to fit in EAX. */
4373 /* Unless ABI prescibes otherwise,
4374 MMX/3dNow values are returned in MM0 if available. */
4377 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
4379 /* SSE values are returned in XMM0 if available. */
4383 /* AVX values are returned in YMM0 if available. */
4387 /* AVX512F values are returned in ZMM0 if available. */
4389 return !TARGET_AVX512F
|| !TARGET_EVEX512
;
4398 /* OImode shouldn't be used directly. */
4399 gcc_assert (mode
!= OImode
);
4405 /* Implement TARGET_PUSH_ARGUMENT. */
4408 ix86_push_argument (unsigned int npush
)
4410 /* If SSE2 is available, use vector move to put large argument onto
4411 stack. NB: In 32-bit mode, use 8-byte vector move. */
4412 return ((!TARGET_SSE2
|| npush
< (TARGET_64BIT
? 16 : 8))
4414 && !ACCUMULATE_OUTGOING_ARGS
);
4418 /* Create the va_list data type. */
4421 ix86_build_builtin_va_list_64 (void)
4423 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4425 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
4426 type_decl
= build_decl (BUILTINS_LOCATION
,
4427 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4429 f_gpr
= build_decl (BUILTINS_LOCATION
,
4430 FIELD_DECL
, get_identifier ("gp_offset"),
4431 unsigned_type_node
);
4432 f_fpr
= build_decl (BUILTINS_LOCATION
,
4433 FIELD_DECL
, get_identifier ("fp_offset"),
4434 unsigned_type_node
);
4435 f_ovf
= build_decl (BUILTINS_LOCATION
,
4436 FIELD_DECL
, get_identifier ("overflow_arg_area"),
4438 f_sav
= build_decl (BUILTINS_LOCATION
,
4439 FIELD_DECL
, get_identifier ("reg_save_area"),
4442 va_list_gpr_counter_field
= f_gpr
;
4443 va_list_fpr_counter_field
= f_fpr
;
4445 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4446 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4447 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4448 DECL_FIELD_CONTEXT (f_sav
) = record
;
4450 TYPE_STUB_DECL (record
) = type_decl
;
4451 TYPE_NAME (record
) = type_decl
;
4452 TYPE_FIELDS (record
) = f_gpr
;
4453 DECL_CHAIN (f_gpr
) = f_fpr
;
4454 DECL_CHAIN (f_fpr
) = f_ovf
;
4455 DECL_CHAIN (f_ovf
) = f_sav
;
4457 layout_type (record
);
4459 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
4460 NULL_TREE
, TYPE_ATTRIBUTES (record
));
4462 /* The correct type is an array type of one element. */
4463 return build_array_type (record
, build_index_type (size_zero_node
));
4466 /* Setup the builtin va_list data type and for 64-bit the additional
4467 calling convention specific va_list data types. */
4470 ix86_build_builtin_va_list (void)
4474 /* Initialize ABI specific va_list builtin types.
4476 In lto1, we can encounter two va_list types:
4477 - one as a result of the type-merge across TUs, and
4478 - the one constructed here.
4479 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4480 a type identity check in canonical_va_list_type based on
4481 TYPE_MAIN_VARIANT (which we used to have) will not work.
4482 Instead, we tag each va_list_type_node with its unique attribute, and
4483 look for the attribute in the type identity check in
4484 canonical_va_list_type.
4486 Tagging sysv_va_list_type_node directly with the attribute is
4487 problematic since it's a array of one record, which will degrade into a
4488 pointer to record when used as parameter (see build_va_arg comments for
4489 an example), dropping the attribute in the process. So we tag the
4492 /* For SYSV_ABI we use an array of one record. */
4493 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
4495 /* For MS_ABI we use plain pointer to argument area. */
4496 tree char_ptr_type
= build_pointer_type (char_type_node
);
4497 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
4498 TYPE_ATTRIBUTES (char_ptr_type
));
4499 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
4501 return ((ix86_abi
== MS_ABI
)
4502 ? ms_va_list_type_node
4503 : sysv_va_list_type_node
);
4507 /* For i386 we use plain pointer to argument area. */
4508 return build_pointer_type (char_type_node
);
4512 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4515 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4521 /* GPR size of varargs save area. */
4522 if (cfun
->va_list_gpr_size
)
4523 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
4525 ix86_varargs_gpr_size
= 0;
4527 /* FPR size of varargs save area. We don't need it if we don't pass
4528 anything in SSE registers. */
4529 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4530 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
4532 ix86_varargs_fpr_size
= 0;
4534 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
4537 save_area
= frame_pointer_rtx
;
4538 set
= get_varargs_alias_set ();
4540 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4541 if (max
> X86_64_REGPARM_MAX
)
4542 max
= X86_64_REGPARM_MAX
;
4544 for (i
= cum
->regno
; i
< max
; i
++)
4546 mem
= gen_rtx_MEM (word_mode
,
4547 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
4548 MEM_NOTRAP_P (mem
) = 1;
4549 set_mem_alias_set (mem
, set
);
4550 emit_move_insn (mem
,
4551 gen_rtx_REG (word_mode
,
4552 x86_64_int_parameter_registers
[i
]));
4555 if (ix86_varargs_fpr_size
)
4558 rtx_code_label
*label
;
4561 /* Now emit code to save SSE registers. The AX parameter contains number
4562 of SSE parameter registers used to call this function, though all we
4563 actually check here is the zero/non-zero status. */
4565 label
= gen_label_rtx ();
4566 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
4567 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
4570 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4571 we used movdqa (i.e. TImode) instead? Perhaps even better would
4572 be if we could determine the real mode of the data, via a hook
4573 into pass_stdarg. Ignore all that for now. */
4575 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
4576 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
4578 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
4579 if (max
> X86_64_SSE_REGPARM_MAX
)
4580 max
= X86_64_SSE_REGPARM_MAX
;
4582 for (i
= cum
->sse_regno
; i
< max
; ++i
)
4584 mem
= plus_constant (Pmode
, save_area
,
4585 i
* 16 + ix86_varargs_gpr_size
);
4586 mem
= gen_rtx_MEM (smode
, mem
);
4587 MEM_NOTRAP_P (mem
) = 1;
4588 set_mem_alias_set (mem
, set
);
4589 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
4591 emit_move_insn (mem
, gen_rtx_REG (smode
, GET_SSE_REGNO (i
)));
4599 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4601 alias_set_type set
= get_varargs_alias_set ();
4604 /* Reset to zero, as there might be a sysv vaarg used
4606 ix86_varargs_gpr_size
= 0;
4607 ix86_varargs_fpr_size
= 0;
4609 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
4613 mem
= gen_rtx_MEM (Pmode
,
4614 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4615 i
* UNITS_PER_WORD
));
4616 MEM_NOTRAP_P (mem
) = 1;
4617 set_mem_alias_set (mem
, set
);
4619 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4620 emit_move_insn (mem
, reg
);
4625 ix86_setup_incoming_varargs (cumulative_args_t cum_v
,
4626 const function_arg_info
&arg
,
4629 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4630 CUMULATIVE_ARGS next_cum
;
4633 /* This argument doesn't appear to be used anymore. Which is good,
4634 because the old code here didn't suppress rtl generation. */
4635 gcc_assert (!no_rtl
);
4640 fntype
= TREE_TYPE (current_function_decl
);
4642 /* For varargs, we do not want to skip the dummy va_dcl argument.
4643 For stdargs, we do want to skip the last named argument. */
4645 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl
))
4646 && stdarg_p (fntype
))
4647 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), arg
);
4649 if (cum
->call_abi
== MS_ABI
)
4650 setup_incoming_varargs_ms_64 (&next_cum
);
4652 setup_incoming_varargs_64 (&next_cum
);
4655 /* Checks if TYPE is of kind va_list char *. */
4658 is_va_list_char_pointer (tree type
)
4662 /* For 32-bit it is always true. */
4665 canonic
= ix86_canonical_va_list_type (type
);
4666 return (canonic
== ms_va_list_type_node
4667 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
4670 /* Implement va_start. */
4673 ix86_va_start (tree valist
, rtx nextarg
)
4675 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4676 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4677 tree gpr
, fpr
, ovf
, sav
, t
;
4681 if (flag_split_stack
4682 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4684 unsigned int scratch_regno
;
4686 /* When we are splitting the stack, we can't refer to the stack
4687 arguments using internal_arg_pointer, because they may be on
4688 the old stack. The split stack prologue will arrange to
4689 leave a pointer to the old stack arguments in a scratch
4690 register, which we here copy to a pseudo-register. The split
4691 stack prologue can't set the pseudo-register directly because
4692 it (the prologue) runs before any registers have been saved. */
4694 scratch_regno
= split_stack_prologue_scratch_regno ();
4695 if (scratch_regno
!= INVALID_REGNUM
)
4700 reg
= gen_reg_rtx (Pmode
);
4701 cfun
->machine
->split_stack_varargs_pointer
= reg
;
4704 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
4708 push_topmost_sequence ();
4709 emit_insn_after (seq
, entry_of_function ());
4710 pop_topmost_sequence ();
4714 /* Only 64bit target needs something special. */
4715 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4717 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4718 std_expand_builtin_va_start (valist
, nextarg
);
4723 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
4724 next
= expand_binop (ptr_mode
, add_optab
,
4725 cfun
->machine
->split_stack_varargs_pointer
,
4726 crtl
->args
.arg_offset_rtx
,
4727 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4728 convert_move (va_r
, next
, 0);
4733 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4734 f_fpr
= DECL_CHAIN (f_gpr
);
4735 f_ovf
= DECL_CHAIN (f_fpr
);
4736 f_sav
= DECL_CHAIN (f_ovf
);
4738 valist
= build_simple_mem_ref (valist
);
4739 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
4740 /* The following should be folded into the MEM_REF offset. */
4741 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
4743 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
4745 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
4747 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
4750 /* Count number of gp and fp argument registers used. */
4751 words
= crtl
->args
.info
.words
;
4752 n_gpr
= crtl
->args
.info
.regno
;
4753 n_fpr
= crtl
->args
.info
.sse_regno
;
4755 if (cfun
->va_list_gpr_size
)
4757 type
= TREE_TYPE (gpr
);
4758 t
= build2 (MODIFY_EXPR
, type
,
4759 gpr
, build_int_cst (type
, n_gpr
* 8));
4760 TREE_SIDE_EFFECTS (t
) = 1;
4761 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4764 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4766 type
= TREE_TYPE (fpr
);
4767 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4768 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
4769 TREE_SIDE_EFFECTS (t
) = 1;
4770 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4773 /* Find the overflow area. */
4774 type
= TREE_TYPE (ovf
);
4775 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4776 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
4778 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
4779 t
= make_tree (type
, ovf_rtx
);
4781 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
4783 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4784 TREE_SIDE_EFFECTS (t
) = 1;
4785 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4787 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
4789 /* Find the register save area.
4790 Prologue of the function save it right above stack frame. */
4791 type
= TREE_TYPE (sav
);
4792 t
= make_tree (type
, frame_pointer_rtx
);
4793 if (!ix86_varargs_gpr_size
)
4794 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
4796 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4797 TREE_SIDE_EFFECTS (t
) = 1;
4798 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4802 /* Implement va_arg. */
4805 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4808 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4809 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4810 tree gpr
, fpr
, ovf
, sav
, t
;
4812 tree lab_false
, lab_over
= NULL_TREE
;
4817 machine_mode nat_mode
;
4818 unsigned int arg_boundary
;
4819 unsigned int type_align
;
4821 /* Only 64bit target needs something special. */
4822 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4823 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4825 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4826 f_fpr
= DECL_CHAIN (f_gpr
);
4827 f_ovf
= DECL_CHAIN (f_fpr
);
4828 f_sav
= DECL_CHAIN (f_ovf
);
4830 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
4831 valist
, f_gpr
, NULL_TREE
);
4833 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4834 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4835 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4837 indirect_p
= pass_va_arg_by_reference (type
);
4839 type
= build_pointer_type (type
);
4840 size
= arg_int_size_in_bytes (type
);
4841 rsize
= CEIL (size
, UNITS_PER_WORD
);
4843 nat_mode
= type_natural_mode (type
, NULL
, false);
4862 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4863 if (!TARGET_64BIT_MS_ABI
)
4871 container
= construct_container (nat_mode
, TYPE_MODE (type
),
4872 type
, 0, X86_64_REGPARM_MAX
,
4873 X86_64_SSE_REGPARM_MAX
, intreg
,
4878 /* Pull the value out of the saved registers. */
4880 addr
= create_tmp_var (ptr_type_node
, "addr");
4881 type_align
= TYPE_ALIGN (type
);
4885 int needed_intregs
, needed_sseregs
;
4887 tree int_addr
, sse_addr
;
4889 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
4890 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
4892 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4894 need_temp
= (!REG_P (container
)
4895 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4896 || TYPE_ALIGN (type
) > 128));
4898 /* In case we are passing structure, verify that it is consecutive block
4899 on the register save area. If not we need to do moves. */
4900 if (!need_temp
&& !REG_P (container
))
4902 /* Verify that all registers are strictly consecutive */
4903 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4907 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4909 rtx slot
= XVECEXP (container
, 0, i
);
4910 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4911 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4919 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4921 rtx slot
= XVECEXP (container
, 0, i
);
4922 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4923 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4935 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4936 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4939 /* First ensure that we fit completely in registers. */
4942 t
= build_int_cst (TREE_TYPE (gpr
),
4943 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
4944 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4945 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4946 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4947 gimplify_and_add (t
, pre_p
);
4951 t
= build_int_cst (TREE_TYPE (fpr
),
4952 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4953 + X86_64_REGPARM_MAX
* 8);
4954 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4955 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4956 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4957 gimplify_and_add (t
, pre_p
);
4960 /* Compute index to start of area used for integer regs. */
4963 /* int_addr = gpr + sav; */
4964 t
= fold_build_pointer_plus (sav
, gpr
);
4965 gimplify_assign (int_addr
, t
, pre_p
);
4969 /* sse_addr = fpr + sav; */
4970 t
= fold_build_pointer_plus (sav
, fpr
);
4971 gimplify_assign (sse_addr
, t
, pre_p
);
4975 int i
, prev_size
= 0;
4976 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4977 TREE_ADDRESSABLE (temp
) = 1;
4980 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4981 gimplify_assign (addr
, t
, pre_p
);
4983 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4985 rtx slot
= XVECEXP (container
, 0, i
);
4986 rtx reg
= XEXP (slot
, 0);
4987 machine_mode mode
= GET_MODE (reg
);
4993 tree dest_addr
, dest
;
4994 int cur_size
= GET_MODE_SIZE (mode
);
4996 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
4997 prev_size
= INTVAL (XEXP (slot
, 1));
4998 if (prev_size
+ cur_size
> size
)
5000 cur_size
= size
- prev_size
;
5001 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
5002 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
5005 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
5006 if (mode
== GET_MODE (reg
))
5007 addr_type
= build_pointer_type (piece_type
);
5009 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
5011 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
5014 if (SSE_REGNO_P (REGNO (reg
)))
5016 src_addr
= sse_addr
;
5017 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
5021 src_addr
= int_addr
;
5022 src_offset
= REGNO (reg
) * 8;
5024 src_addr
= fold_convert (addr_type
, src_addr
);
5025 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
5027 dest_addr
= fold_convert (daddr_type
, addr
);
5028 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
5029 if (cur_size
== GET_MODE_SIZE (mode
))
5031 src
= build_va_arg_indirect_ref (src_addr
);
5032 dest
= build_va_arg_indirect_ref (dest_addr
);
5034 gimplify_assign (dest
, src
, pre_p
);
5039 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
5040 3, dest_addr
, src_addr
,
5041 size_int (cur_size
));
5042 gimplify_and_add (copy
, pre_p
);
5044 prev_size
+= cur_size
;
5050 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
5051 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
5052 gimplify_assign (gpr
, t
, pre_p
);
5053 /* The GPR save area guarantees only 8-byte alignment. */
5055 type_align
= MIN (type_align
, 64);
5060 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
5061 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
5062 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
5065 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
5067 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
5070 /* ... otherwise out of the overflow area. */
5072 /* When we align parameter on stack for caller, if the parameter
5073 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5074 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5075 here with caller. */
5076 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
5077 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
5078 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
5080 /* Care for on-stack alignment if needed. */
5081 if (arg_boundary
<= 64 || size
== 0)
5085 HOST_WIDE_INT align
= arg_boundary
/ 8;
5086 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
5087 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5088 build_int_cst (TREE_TYPE (t
), -align
));
5091 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
5092 gimplify_assign (addr
, t
, pre_p
);
5094 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
5095 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
5098 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
5100 type
= build_aligned_type (type
, type_align
);
5101 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
5102 addr
= fold_convert (ptrtype
, addr
);
5105 addr
= build_va_arg_indirect_ref (addr
);
5106 return build_va_arg_indirect_ref (addr
);
5109 /* Return true if OPNUM's MEM should be matched
5110 in movabs* patterns. */
5113 ix86_check_movabs (rtx insn
, int opnum
)
5117 set
= PATTERN (insn
);
5118 if (GET_CODE (set
) == PARALLEL
)
5119 set
= XVECEXP (set
, 0, 0);
5120 gcc_assert (GET_CODE (set
) == SET
);
5121 mem
= XEXP (set
, opnum
);
5122 while (SUBREG_P (mem
))
5123 mem
= SUBREG_REG (mem
);
5124 gcc_assert (MEM_P (mem
));
5125 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
5128 /* Return false if INSN contains a MEM with a non-default address space. */
5130 ix86_check_no_addr_space (rtx insn
)
5132 subrtx_var_iterator::array_type array
;
5133 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
5136 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
5142 /* Initialize the table of extra 80387 mathematical constants. */
5145 init_ext_80387_constants (void)
5147 static const char * cst
[5] =
5149 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5150 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5151 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5152 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5153 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5157 for (i
= 0; i
< 5; i
++)
5159 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5160 /* Ensure each constant is rounded to XFmode precision. */
5161 real_convert (&ext_80387_constants_table
[i
],
5162 XFmode
, &ext_80387_constants_table
[i
]);
5165 ext_80387_constants_init
= 1;
5168 /* Return non-zero if the constant is something that
5169 can be loaded with a special instruction. */
5172 standard_80387_constant_p (rtx x
)
5174 machine_mode mode
= GET_MODE (x
);
5176 const REAL_VALUE_TYPE
*r
;
5178 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
5181 if (x
== CONST0_RTX (mode
))
5183 if (x
== CONST1_RTX (mode
))
5186 r
= CONST_DOUBLE_REAL_VALUE (x
);
5188 /* For XFmode constants, try to find a special 80387 instruction when
5189 optimizing for size or on those CPUs that benefit from them. */
5191 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
)
5192 && !flag_rounding_math
)
5196 if (! ext_80387_constants_init
)
5197 init_ext_80387_constants ();
5199 for (i
= 0; i
< 5; i
++)
5200 if (real_identical (r
, &ext_80387_constants_table
[i
]))
5204 /* Load of the constant -0.0 or -1.0 will be split as
5205 fldz;fchs or fld1;fchs sequence. */
5206 if (real_isnegzero (r
))
5208 if (real_identical (r
, &dconstm1
))
5214 /* Return the opcode of the special instruction to be used to load
5218 standard_80387_constant_opcode (rtx x
)
5220 switch (standard_80387_constant_p (x
))
5244 /* Return the CONST_DOUBLE representing the 80387 constant that is
5245 loaded by the specified special instruction. The argument IDX
5246 matches the return value from standard_80387_constant_p. */
5249 standard_80387_constant_rtx (int idx
)
5253 if (! ext_80387_constants_init
)
5254 init_ext_80387_constants ();
5270 return const_double_from_real_value (ext_80387_constants_table
[i
],
5274 /* Return 1 if X is all bits 0, 2 if X is all bits 1
5275 and 3 if X is all bits 1 with zero extend
5276 in supported SSE/AVX vector mode. */
5279 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
5286 mode
= GET_MODE (x
);
5288 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5291 if (x
== constm1_rtx
5292 || vector_all_ones_operand (x
, mode
)
5293 || ((GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
5294 || GET_MODE_CLASS (pred_mode
) == MODE_VECTOR_FLOAT
)
5295 && float_vector_all_ones_operand (x
, mode
)))
5297 /* VOIDmode integer constant, get mode from the predicate. */
5298 if (mode
== VOIDmode
)
5301 switch (GET_MODE_SIZE (mode
))
5304 if (TARGET_AVX512F
&& TARGET_EVEX512
)
5323 if (vector_all_ones_zero_extend_half_operand (x
, mode
)
5324 || vector_all_ones_zero_extend_quarter_operand (x
, mode
))
5330 /* Return the opcode of the special instruction to be used to load
5331 the constant operands[1] into operands[0]. */
5334 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
5337 rtx x
= operands
[1];
5339 gcc_assert (TARGET_SSE
);
5341 mode
= GET_MODE (x
);
5343 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5345 switch (get_attr_mode (insn
))
5348 if (!EXT_REX_SSE_REG_P (operands
[0]))
5349 return "%vpxor\t%0, %d0";
5353 if (EXT_REX_SSE_REG_P (operands
[0]))
5355 if (TARGET_AVX512VL
)
5356 return "vpxord\t%x0, %x0, %x0";
5357 else if (TARGET_EVEX512
)
5358 return "vpxord\t%g0, %g0, %g0";
5362 return "vpxor\t%x0, %x0, %x0";
5365 if (!EXT_REX_SSE_REG_P (operands
[0]))
5366 return "%vxorpd\t%0, %d0";
5370 if (EXT_REX_SSE_REG_P (operands
[0]))
5372 if (TARGET_AVX512DQ
)
5374 if (TARGET_AVX512VL
)
5375 return "vxorpd\t%x0, %x0, %x0";
5376 else if (TARGET_EVEX512
)
5377 return "vxorpd\t%g0, %g0, %g0";
5383 if (TARGET_AVX512VL
)
5384 return "vpxorq\t%x0, %x0, %x0";
5385 else if (TARGET_EVEX512
)
5386 return "vpxorq\t%g0, %g0, %g0";
5391 return "vxorpd\t%x0, %x0, %x0";
5394 if (!EXT_REX_SSE_REG_P (operands
[0]))
5395 return "%vxorps\t%0, %d0";
5399 if (EXT_REX_SSE_REG_P (operands
[0]))
5401 if (TARGET_AVX512DQ
)
5403 if (TARGET_AVX512VL
)
5404 return "vxorps\t%x0, %x0, %x0";
5405 else if (TARGET_EVEX512
)
5406 return "vxorps\t%g0, %g0, %g0";
5412 if (TARGET_AVX512VL
)
5413 return "vpxord\t%x0, %x0, %x0";
5414 else if (TARGET_EVEX512
)
5415 return "vpxord\t%g0, %g0, %g0";
5420 return "vxorps\t%x0, %x0, %x0";
5426 else if (x
== constm1_rtx
5427 || vector_all_ones_operand (x
, mode
)
5428 || (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
5429 && float_vector_all_ones_operand (x
, mode
)))
5431 enum attr_mode insn_mode
= get_attr_mode (insn
);
5438 gcc_assert (TARGET_AVX512F
&& TARGET_EVEX512
);
5439 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5444 gcc_assert (TARGET_AVX2
);
5449 gcc_assert (TARGET_SSE2
);
5450 if (EXT_REX_SSE_REG_P (operands
[0]))
5452 if (TARGET_AVX512VL
)
5453 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5454 else if (TARGET_EVEX512
)
5455 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5460 ? "vpcmpeqd\t%0, %0, %0"
5461 : "pcmpeqd\t%0, %0");
5467 else if (vector_all_ones_zero_extend_half_operand (x
, mode
))
5469 if (GET_MODE_SIZE (mode
) == 64)
5471 gcc_assert (TARGET_AVX512F
&& TARGET_EVEX512
);
5472 return "vpcmpeqd\t%t0, %t0, %t0";
5474 else if (GET_MODE_SIZE (mode
) == 32)
5476 gcc_assert (TARGET_AVX
);
5477 return "vpcmpeqd\t%x0, %x0, %x0";
5481 else if (vector_all_ones_zero_extend_quarter_operand (x
, mode
))
5483 gcc_assert (TARGET_AVX512F
&& TARGET_EVEX512
);
5484 return "vpcmpeqd\t%x0, %x0, %x0";
5490 /* Returns true if INSN can be transformed from a memory load
5491 to a supported FP constant load. */
5494 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
5496 rtx src
= find_constant_src (insn
);
5498 gcc_assert (REG_P (dst
));
5501 || (SSE_REGNO_P (REGNO (dst
))
5502 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
5503 || (!TARGET_AVX512VL
5504 && EXT_REX_SSE_REGNO_P (REGNO (dst
))
5505 && standard_sse_constant_p (src
, GET_MODE (dst
)) == 1)
5506 || (STACK_REGNO_P (REGNO (dst
))
5507 && standard_80387_constant_p (src
) < 1))
5513 /* Predicate for pre-reload splitters with associated instructions,
5514 which can match any time before the split1 pass (usually combine),
5515 then are unconditionally split in that pass and should not be
5516 matched again afterwards. */
5519 ix86_pre_reload_split (void)
5521 return (can_create_pseudo_p ()
5522 && !(cfun
->curr_properties
& PROP_rtl_split_insns
));
5525 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5526 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5527 TARGET_AVX512VL or it is a register to register move which can
5528 be done with zmm register move. */
5531 ix86_get_ssemov (rtx
*operands
, unsigned size
,
5532 enum attr_mode insn_mode
, machine_mode mode
)
5535 bool misaligned_p
= (misaligned_operand (operands
[0], mode
)
5536 || misaligned_operand (operands
[1], mode
));
5537 bool evex_reg_p
= (size
== 64
5538 || EXT_REX_SSE_REG_P (operands
[0])
5539 || EXT_REX_SSE_REG_P (operands
[1]));
5541 bool egpr_p
= (TARGET_APX_EGPR
5542 && (x86_extended_rex2reg_mentioned_p (operands
[0])
5543 || x86_extended_rex2reg_mentioned_p (operands
[1])));
5544 bool egpr_vl
= egpr_p
&& TARGET_AVX512VL
;
5546 machine_mode scalar_mode
;
5548 const char *opcode
= NULL
;
5554 } type
= opcode_int
;
5561 scalar_mode
= E_SFmode
;
5562 type
= opcode_float
;
5567 scalar_mode
= E_DFmode
;
5568 type
= opcode_double
;
5573 scalar_mode
= GET_MODE_INNER (mode
);
5579 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5580 we can only use zmm register move without memory operand. */
5583 && GET_MODE_SIZE (mode
) < 64)
5585 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5586 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5587 AVX512VL is disabled, LRA can still generate reg to
5588 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5590 if (memory_operand (operands
[0], mode
)
5591 || memory_operand (operands
[1], mode
))
5594 /* We need TARGET_EVEX512 to move into zmm register. */
5595 gcc_assert (TARGET_EVEX512
);
5599 if (scalar_mode
== E_HFmode
|| scalar_mode
== E_BFmode
)
5600 opcode
= (misaligned_p
5601 ? (TARGET_AVX512BW
? "vmovdqu16" : "vmovdqu64")
5604 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5607 opcode
= misaligned_p
? "vmovups" : "vmovaps";
5610 opcode
= misaligned_p
? "vmovupd" : "vmovapd";
5614 else if (SCALAR_FLOAT_MODE_P (scalar_mode
))
5616 switch (scalar_mode
)
5620 if (evex_reg_p
|| egpr_vl
)
5621 opcode
= (misaligned_p
5627 opcode
= (misaligned_p
5633 opcode
= (misaligned_p
5640 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5643 opcode
= misaligned_p
? "%vmovupd" : "%vmovapd";
5646 if (evex_reg_p
|| egpr_vl
)
5647 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5649 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5651 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5657 else if (SCALAR_INT_MODE_P (scalar_mode
))
5659 switch (scalar_mode
)
5662 if (evex_reg_p
|| egpr_vl
)
5663 opcode
= (misaligned_p
5669 opcode
= (misaligned_p
5675 opcode
= (misaligned_p
5682 if (evex_reg_p
|| egpr_vl
)
5683 opcode
= (misaligned_p
5689 opcode
= (misaligned_p
5695 opcode
= (misaligned_p
5702 if (evex_reg_p
|| egpr_vl
)
5703 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5705 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5707 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5712 if (evex_reg_p
|| egpr_vl
)
5713 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5715 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5717 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5720 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5732 snprintf (buf
, sizeof (buf
), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5736 snprintf (buf
, sizeof (buf
), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5740 snprintf (buf
, sizeof (buf
), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5746 output_asm_insn (buf
, operands
);
5750 /* Return the template of the TYPE_SSEMOV instruction to move
5751 operands[1] into operands[0]. */
5754 ix86_output_ssemov (rtx_insn
*insn
, rtx
*operands
)
5756 machine_mode mode
= GET_MODE (operands
[0]);
5757 if (get_attr_type (insn
) != TYPE_SSEMOV
5758 || mode
!= GET_MODE (operands
[1]))
5761 enum attr_mode insn_mode
= get_attr_mode (insn
);
5768 return ix86_get_ssemov (operands
, 64, insn_mode
, mode
);
5773 return ix86_get_ssemov (operands
, 32, insn_mode
, mode
);
5778 return ix86_get_ssemov (operands
, 16, insn_mode
, mode
);
5781 /* Handle broken assemblers that require movd instead of movq. */
5782 if (GENERAL_REG_P (operands
[0]))
5784 if (HAVE_AS_IX86_INTERUNIT_MOVQ
)
5785 return "%vmovq\t{%1, %q0|%q0, %1}";
5787 return "%vmovd\t{%1, %q0|%q0, %1}";
5789 else if (GENERAL_REG_P (operands
[1]))
5791 if (HAVE_AS_IX86_INTERUNIT_MOVQ
)
5792 return "%vmovq\t{%q1, %0|%0, %q1}";
5794 return "%vmovd\t{%q1, %0|%0, %q1}";
5797 return "%vmovq\t{%1, %0|%0, %1}";
5800 if (GENERAL_REG_P (operands
[0]))
5801 return "%vmovd\t{%1, %k0|%k0, %1}";
5802 else if (GENERAL_REG_P (operands
[1]))
5803 return "%vmovd\t{%k1, %0|%0, %k1}";
5805 return "%vmovd\t{%1, %0|%0, %1}";
5808 if (GENERAL_REG_P (operands
[0]))
5809 return "vmovw\t{%1, %k0|%k0, %1}";
5810 else if (GENERAL_REG_P (operands
[1]))
5811 return "vmovw\t{%k1, %0|%0, %k1}";
5813 return "vmovw\t{%1, %0|%0, %1}";
5816 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5817 return "vmovsd\t{%d1, %0|%0, %d1}";
5819 return "%vmovsd\t{%1, %0|%0, %1}";
5822 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5823 return "vmovss\t{%d1, %0|%0, %d1}";
5825 return "%vmovss\t{%1, %0|%0, %1}";
5829 if (REG_P (operands
[0]) && REG_P (operands
[1]))
5830 return "vmovsh\t{%d1, %0|%0, %d1}";
5832 return "vmovsh\t{%1, %0|%0, %1}";
5835 gcc_assert (!TARGET_AVX
);
5836 return "movlpd\t{%1, %0|%0, %1}";
5839 if (TARGET_AVX
&& REG_P (operands
[0]))
5840 return "vmovlps\t{%1, %d0|%d0, %1}";
5842 return "%vmovlps\t{%1, %0|%0, %1}";
5849 /* Returns true if OP contains a symbol reference */
5852 symbolic_reference_mentioned_p (rtx op
)
5857 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5860 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5861 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5867 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5868 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5872 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5879 /* Return true if it is appropriate to emit `ret' instructions in the
5880 body of a function. Do this only if the epilogue is simple, needing a
5881 couple of insns. Prior to reloading, we can't tell how many registers
5882 must be saved, so return false then. Return false if there is no frame
5883 marker to de-allocate. */
5886 ix86_can_use_return_insn_p (void)
5888 if (ix86_function_ms_hook_prologue (current_function_decl
))
5891 if (ix86_function_naked (current_function_decl
))
5894 /* Don't use `ret' instruction in interrupt handler. */
5895 if (! reload_completed
5896 || frame_pointer_needed
5897 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
5900 /* Don't allow more than 32k pop, since that's all we can do
5901 with one instruction. */
5902 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
5905 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5906 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
5907 && (frame
.nregs
+ frame
.nsseregs
) == 0);
5910 /* Return stack frame size. get_frame_size () returns used stack slots
5911 during compilation, which may be optimized out later. If stack frame
5912 is needed, stack_frame_required should be true. */
5914 static HOST_WIDE_INT
5915 ix86_get_frame_size (void)
5917 if (cfun
->machine
->stack_frame_required
)
5918 return get_frame_size ();
5923 /* Value should be nonzero if functions must have frame pointers.
5924 Zero means the frame pointer need not be set up (and parms may
5925 be accessed via the stack pointer) in functions that seem suitable. */
5928 ix86_frame_pointer_required (void)
5930 /* If we accessed previous frames, then the generated code expects
5931 to be able to access the saved ebp value in our frame. */
5932 if (cfun
->machine
->accesses_prev_frame
)
5935 /* Several x86 os'es need a frame pointer for other reasons,
5936 usually pertaining to setjmp. */
5937 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5940 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5941 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
5944 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5945 allocation is 4GB. */
5946 if (TARGET_64BIT_MS_ABI
&& ix86_get_frame_size () > SEH_MAX_FRAME_SIZE
)
5949 /* SSE saves require frame-pointer when stack is misaligned. */
5950 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
5953 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5954 turns off the frame pointer by default. Turn it back on now if
5955 we've not got a leaf function. */
5956 if (TARGET_OMIT_LEAF_FRAME_POINTER
5958 || ix86_current_function_calls_tls_descriptor
))
5961 /* Several versions of mcount for the x86 assumes that there is a
5962 frame, so we cannot allow profiling without a frame pointer. */
5963 if (crtl
->profile
&& !flag_fentry
)
5969 /* Record that the current function accesses previous call frames. */
5972 ix86_setup_frame_addresses (void)
5974 cfun
->machine
->accesses_prev_frame
= 1;
5977 #ifndef USE_HIDDEN_LINKONCE
5978 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5979 # define USE_HIDDEN_LINKONCE 1
5981 # define USE_HIDDEN_LINKONCE 0
5985 /* Label count for call and return thunks. It is used to make unique
5986 labels in call and return thunks. */
5987 static int indirectlabelno
;
5989 /* True if call thunk function is needed. */
5990 static bool indirect_thunk_needed
= false;
5992 /* Bit masks of integer registers, which contain branch target, used
5993 by call thunk functions. */
5994 static HARD_REG_SET indirect_thunks_used
;
5996 /* True if return thunk function is needed. */
5997 static bool indirect_return_needed
= false;
5999 /* True if return thunk function via CX is needed. */
6000 static bool indirect_return_via_cx
;
6002 #ifndef INDIRECT_LABEL
6003 # define INDIRECT_LABEL "LIND"
6006 /* Indicate what prefix is needed for an indirect branch. */
6007 enum indirect_thunk_prefix
6009 indirect_thunk_prefix_none
,
6010 indirect_thunk_prefix_nt
6013 /* Return the prefix needed for an indirect branch INSN. */
6015 enum indirect_thunk_prefix
6016 indirect_thunk_need_prefix (rtx_insn
*insn
)
6018 enum indirect_thunk_prefix need_prefix
;
6019 if ((cfun
->machine
->indirect_branch_type
6020 == indirect_branch_thunk_extern
)
6021 && ix86_notrack_prefixed_insn_p (insn
))
6023 /* NOTRACK prefix is only used with external thunk so that it
6024 can be properly updated to support CET at run-time. */
6025 need_prefix
= indirect_thunk_prefix_nt
;
6028 need_prefix
= indirect_thunk_prefix_none
;
6032 /* Fills in the label name that should be used for the indirect thunk. */
6035 indirect_thunk_name (char name
[32], unsigned int regno
,
6036 enum indirect_thunk_prefix need_prefix
,
6039 if (regno
!= INVALID_REGNUM
&& regno
!= CX_REG
&& ret_p
)
6042 if (USE_HIDDEN_LINKONCE
)
6046 if (need_prefix
== indirect_thunk_prefix_nt
6047 && regno
!= INVALID_REGNUM
)
6049 /* NOTRACK prefix is only used with external thunk via
6050 register so that NOTRACK prefix can be added to indirect
6051 branch via register to support CET at run-time. */
6057 const char *ret
= ret_p
? "return" : "indirect";
6059 if (regno
!= INVALID_REGNUM
)
6061 const char *reg_prefix
;
6062 if (LEGACY_INT_REGNO_P (regno
))
6063 reg_prefix
= TARGET_64BIT
? "r" : "e";
6066 sprintf (name
, "__x86_%s_thunk%s_%s%s",
6067 ret
, prefix
, reg_prefix
, reg_names
[regno
]);
6070 sprintf (name
, "__x86_%s_thunk%s", ret
, prefix
);
6074 if (regno
!= INVALID_REGNUM
)
6075 ASM_GENERATE_INTERNAL_LABEL (name
, "LITR", regno
);
6079 ASM_GENERATE_INTERNAL_LABEL (name
, "LRT", 0);
6081 ASM_GENERATE_INTERNAL_LABEL (name
, "LIT", 0);
6086 /* Output a call and return thunk for indirect branch. If REGNO != -1,
6087 the function address is in REGNO and the call and return thunk looks like:
6098 Otherwise, the function address is on the top of stack and the
6099 call and return thunk looks like:
6107 lea WORD_SIZE(%sp), %sp
6112 output_indirect_thunk (unsigned int regno
)
6114 char indirectlabel1
[32];
6115 char indirectlabel2
[32];
6117 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
, INDIRECT_LABEL
,
6119 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
, INDIRECT_LABEL
,
6123 fputs ("\tcall\t", asm_out_file
);
6124 assemble_name_raw (asm_out_file
, indirectlabel2
);
6125 fputc ('\n', asm_out_file
);
6127 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
6129 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6130 Usage of both pause + lfence is compromise solution. */
6131 fprintf (asm_out_file
, "\tpause\n\tlfence\n");
6134 fputs ("\tjmp\t", asm_out_file
);
6135 assemble_name_raw (asm_out_file
, indirectlabel1
);
6136 fputc ('\n', asm_out_file
);
6138 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
6140 /* The above call insn pushed a word to stack. Adjust CFI info. */
6141 if (flag_asynchronous_unwind_tables
&& dwarf2out_do_frame ())
6143 if (! dwarf2out_do_cfi_asm ())
6145 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
6146 xcfi
->dw_cfi_opc
= DW_CFA_advance_loc4
;
6147 xcfi
->dw_cfi_oprnd1
.dw_cfi_addr
= ggc_strdup (indirectlabel2
);
6148 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
6150 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
6151 xcfi
->dw_cfi_opc
= DW_CFA_def_cfa_offset
;
6152 xcfi
->dw_cfi_oprnd1
.dw_cfi_offset
= 2 * UNITS_PER_WORD
;
6153 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
6154 dwarf2out_emit_cfi (xcfi
);
6157 if (regno
!= INVALID_REGNUM
)
6161 xops
[0] = gen_rtx_MEM (word_mode
, stack_pointer_rtx
);
6162 xops
[1] = gen_rtx_REG (word_mode
, regno
);
6163 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops
);
6169 xops
[0] = stack_pointer_rtx
;
6170 xops
[1] = plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
6171 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops
);
6174 fputs ("\tret\n", asm_out_file
);
6175 if ((ix86_harden_sls
& harden_sls_return
))
6176 fputs ("\tint3\n", asm_out_file
);
6179 /* Output a funtion with a call and return thunk for indirect branch.
6180 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6181 Otherwise, the function address is on the top of stack. Thunk is
6182 used for function return if RET_P is true. */
6185 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix
,
6186 unsigned int regno
, bool ret_p
)
6191 /* Create __x86_indirect_thunk. */
6192 indirect_thunk_name (name
, regno
, need_prefix
, ret_p
);
6193 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
6194 get_identifier (name
),
6195 build_function_type_list (void_type_node
, NULL_TREE
));
6196 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
6197 NULL_TREE
, void_type_node
);
6198 TREE_PUBLIC (decl
) = 1;
6199 TREE_STATIC (decl
) = 1;
6200 DECL_IGNORED_P (decl
) = 1;
6205 switch_to_section (darwin_sections
[picbase_thunk_section
]);
6206 fputs ("\t.weak_definition\t", asm_out_file
);
6207 assemble_name (asm_out_file
, name
);
6208 fputs ("\n\t.private_extern\t", asm_out_file
);
6209 assemble_name (asm_out_file
, name
);
6210 putc ('\n', asm_out_file
);
6211 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6212 DECL_WEAK (decl
) = 1;
6216 if (USE_HIDDEN_LINKONCE
)
6218 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
6220 targetm
.asm_out
.unique_section (decl
, 0);
6221 switch_to_section (get_named_section (decl
, NULL
, 0));
6223 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
6224 fputs ("\t.hidden\t", asm_out_file
);
6225 assemble_name (asm_out_file
, name
);
6226 putc ('\n', asm_out_file
);
6227 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
6231 switch_to_section (text_section
);
6232 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6235 DECL_INITIAL (decl
) = make_node (BLOCK
);
6236 current_function_decl
= decl
;
6237 allocate_struct_function (decl
, false);
6238 init_function_start (decl
);
6239 /* We're about to hide the function body from callees of final_* by
6240 emitting it directly; tell them we're a thunk, if they care. */
6241 cfun
->is_thunk
= true;
6242 first_function_block_is_cold
= false;
6243 /* Make sure unwind info is emitted for the thunk if needed. */
6244 final_start_function (emit_barrier (), asm_out_file
, 1);
6246 output_indirect_thunk (regno
);
6248 final_end_function ();
6249 init_insn_lengths ();
6250 free_after_compilation (cfun
);
6252 current_function_decl
= NULL
;
6255 static int pic_labels_used
;
6257 /* Fills in the label name that should be used for a pc thunk for
6258 the given register. */
6261 get_pc_thunk_name (char name
[32], unsigned int regno
)
6263 gcc_assert (!TARGET_64BIT
);
6265 if (USE_HIDDEN_LINKONCE
)
6266 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
6268 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
6272 /* This function generates code for -fpic that loads %ebx with
6273 the return address of the caller and then returns. */
6276 ix86_code_end (void)
6281 if (indirect_return_needed
)
6282 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6283 INVALID_REGNUM
, true);
6284 if (indirect_return_via_cx
)
6285 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6287 if (indirect_thunk_needed
)
6288 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6289 INVALID_REGNUM
, false);
6291 for (regno
= FIRST_REX_INT_REG
; regno
<= LAST_REX_INT_REG
; regno
++)
6293 if (TEST_HARD_REG_BIT (indirect_thunks_used
, regno
))
6294 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6298 for (regno
= FIRST_REX2_INT_REG
; regno
<= LAST_REX2_INT_REG
; regno
++)
6300 if (TEST_HARD_REG_BIT (indirect_thunks_used
, regno
))
6301 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6305 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
6310 if (TEST_HARD_REG_BIT (indirect_thunks_used
, regno
))
6311 output_indirect_thunk_function (indirect_thunk_prefix_none
,
6314 if (!(pic_labels_used
& (1 << regno
)))
6317 get_pc_thunk_name (name
, regno
);
6319 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
6320 get_identifier (name
),
6321 build_function_type_list (void_type_node
, NULL_TREE
));
6322 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
6323 NULL_TREE
, void_type_node
);
6324 TREE_PUBLIC (decl
) = 1;
6325 TREE_STATIC (decl
) = 1;
6326 DECL_IGNORED_P (decl
) = 1;
6331 switch_to_section (darwin_sections
[picbase_thunk_section
]);
6332 fputs ("\t.weak_definition\t", asm_out_file
);
6333 assemble_name (asm_out_file
, name
);
6334 fputs ("\n\t.private_extern\t", asm_out_file
);
6335 assemble_name (asm_out_file
, name
);
6336 putc ('\n', asm_out_file
);
6337 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6338 DECL_WEAK (decl
) = 1;
6342 if (USE_HIDDEN_LINKONCE
)
6344 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
6346 targetm
.asm_out
.unique_section (decl
, 0);
6347 switch_to_section (get_named_section (decl
, NULL
, 0));
6349 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
6350 fputs ("\t.hidden\t", asm_out_file
);
6351 assemble_name (asm_out_file
, name
);
6352 putc ('\n', asm_out_file
);
6353 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
6357 switch_to_section (text_section
);
6358 ASM_OUTPUT_LABEL (asm_out_file
, name
);
6361 DECL_INITIAL (decl
) = make_node (BLOCK
);
6362 current_function_decl
= decl
;
6363 allocate_struct_function (decl
, false);
6364 init_function_start (decl
);
6365 /* We're about to hide the function body from callees of final_* by
6366 emitting it directly; tell them we're a thunk, if they care. */
6367 cfun
->is_thunk
= true;
6368 first_function_block_is_cold
= false;
6369 /* Make sure unwind info is emitted for the thunk if needed. */
6370 final_start_function (emit_barrier (), asm_out_file
, 1);
6372 /* Pad stack IP move with 4 instructions (two NOPs count
6373 as one instruction). */
6374 if (TARGET_PAD_SHORT_FUNCTION
)
6379 fputs ("\tnop\n", asm_out_file
);
6382 xops
[0] = gen_rtx_REG (Pmode
, regno
);
6383 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
6384 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
6385 fputs ("\tret\n", asm_out_file
);
6386 final_end_function ();
6387 init_insn_lengths ();
6388 free_after_compilation (cfun
);
6390 current_function_decl
= NULL
;
6393 if (flag_split_stack
)
6394 file_end_indicate_split_stack ();
6397 /* Emit code for the SET_GOT patterns. */
6400 output_set_got (rtx dest
, rtx label
)
6406 if (TARGET_VXWORKS_RTP
&& flag_pic
)
6408 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6409 xops
[2] = gen_rtx_MEM (Pmode
,
6410 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
6411 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
6413 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6414 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6415 an unadorned address. */
6416 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6417 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
6418 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
6422 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
6427 get_pc_thunk_name (name
, REGNO (dest
));
6428 pic_labels_used
|= 1 << REGNO (dest
);
6430 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
6431 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
6432 output_asm_insn ("%!call\t%X2", xops
);
6435 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6436 This is what will be referenced by the Mach-O PIC subsystem. */
6437 if (machopic_should_output_picbase_label () || !label
)
6438 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
6440 /* When we are restoring the pic base at the site of a nonlocal label,
6441 and we decided to emit the pic base above, we will still output a
6442 local label used for calculating the correction offset (even though
6443 the offset will be 0 in that case). */
6445 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6446 CODE_LABEL_NUMBER (label
));
6452 /* We don't need a pic base, we're not producing pic. */
6455 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
6456 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
6457 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6458 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
6462 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
6467 /* Generate an "push" pattern for input ARG. */
6470 gen_push (rtx arg
, bool ppx_p
)
6472 struct machine_function
*m
= cfun
->machine
;
6474 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6475 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
6476 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
6478 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6479 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6481 rtx stack
= gen_rtx_MEM (word_mode
,
6482 gen_rtx_PRE_DEC (Pmode
,
6483 stack_pointer_rtx
));
6484 return ppx_p
? gen_pushp_di (stack
, arg
) : gen_rtx_SET (stack
, arg
);
6490 struct machine_function
*m
= cfun
->machine
;
6493 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6494 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
6495 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
6497 flags
= gen_rtx_REG (CCmode
, FLAGS_REG
);
6499 mem
= gen_rtx_MEM (word_mode
,
6500 gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
));
6502 return gen_pushfl2 (word_mode
, mem
, flags
);
6505 /* Generate an "pop" pattern for input ARG. */
6508 gen_pop (rtx arg
, bool ppx_p
)
6510 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6511 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6513 rtx stack
= gen_rtx_MEM (word_mode
,
6514 gen_rtx_POST_INC (Pmode
,
6515 stack_pointer_rtx
));
6517 return ppx_p
? gen_popp_di (arg
, stack
) : gen_rtx_SET (arg
, stack
);
6525 flags
= gen_rtx_REG (CCmode
, FLAGS_REG
);
6527 mem
= gen_rtx_MEM (word_mode
,
6528 gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
));
6530 return gen_popfl1 (word_mode
, flags
, mem
);
6533 /* Generate a "push2" pattern for input ARG. */
6535 gen_push2 (rtx mem
, rtx reg1
, rtx reg2
, bool ppx_p
= false)
6537 struct machine_function
*m
= cfun
->machine
;
6538 const int offset
= UNITS_PER_WORD
* 2;
6540 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6541 m
->fs
.cfa_offset
+= offset
;
6542 m
->fs
.sp_offset
+= offset
;
6544 if (REG_P (reg1
) && GET_MODE (reg1
) != word_mode
)
6545 reg1
= gen_rtx_REG (word_mode
, REGNO (reg1
));
6547 if (REG_P (reg2
) && GET_MODE (reg2
) != word_mode
)
6548 reg2
= gen_rtx_REG (word_mode
, REGNO (reg2
));
6550 return ppx_p
? gen_push2p_di (mem
, reg1
, reg2
):
6551 gen_push2_di (mem
, reg1
, reg2
);
6554 /* Return >= 0 if there is an unused call-clobbered register available
6555 for the entire function. */
6558 ix86_select_alt_pic_regnum (void)
6560 if (ix86_use_pseudo_pic_reg ())
6561 return INVALID_REGNUM
;
6565 && !ix86_current_function_calls_tls_descriptor
)
6568 /* Can't use the same register for both PIC and DRAP. */
6570 drap
= REGNO (crtl
->drap_reg
);
6573 for (i
= 2; i
>= 0; --i
)
6574 if (i
!= drap
&& !df_regs_ever_live_p (i
))
6578 return INVALID_REGNUM
;
6581 /* Return true if REGNO is used by the epilogue. */
6584 ix86_epilogue_uses (int regno
)
6586 /* If there are no caller-saved registers, we preserve all registers,
6587 except for MMX and x87 registers which aren't supported when saving
6588 and restoring registers. Don't explicitly save SP register since
6589 it is always preserved. */
6590 return (epilogue_completed
6591 && (cfun
->machine
->call_saved_registers
6592 == TYPE_NO_CALLER_SAVED_REGISTERS
)
6593 && !fixed_regs
[regno
]
6594 && !STACK_REGNO_P (regno
)
6595 && !MMX_REGNO_P (regno
));
6598 /* Return nonzero if register REGNO can be used as a scratch register
6602 ix86_hard_regno_scratch_ok (unsigned int regno
)
6604 /* If there are no caller-saved registers, we can't use any register
6605 as a scratch register after epilogue and use REGNO as scratch
6606 register only if it has been used before to avoid saving and
6608 return ((cfun
->machine
->call_saved_registers
6609 != TYPE_NO_CALLER_SAVED_REGISTERS
)
6610 || (!epilogue_completed
6611 && df_regs_ever_live_p (regno
)));
6614 /* Return TRUE if we need to save REGNO. */
6617 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
6621 switch (cfun
->machine
->call_saved_registers
)
6623 case TYPE_DEFAULT_CALL_SAVED_REGISTERS
:
6626 case TYPE_NO_CALLER_SAVED_REGISTERS
:
6627 /* If there are no caller-saved registers, we preserve all
6628 registers, except for MMX and x87 registers which aren't
6629 supported when saving and restoring registers. Don't
6630 explicitly save SP register since it is always preserved.
6632 Don't preserve registers used for function return value. */
6633 reg
= crtl
->return_rtx
;
6636 unsigned int i
= REGNO (reg
);
6637 unsigned int nregs
= REG_NREGS (reg
);
6639 if ((i
+ nregs
) == regno
)
6643 return (df_regs_ever_live_p (regno
)
6644 && !fixed_regs
[regno
]
6645 && !STACK_REGNO_P (regno
)
6646 && !MMX_REGNO_P (regno
)
6647 && (regno
!= HARD_FRAME_POINTER_REGNUM
6648 || !frame_pointer_needed
));
6650 case TYPE_NO_CALLEE_SAVED_REGISTERS
:
6654 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
6655 && pic_offset_table_rtx
)
6657 if (ix86_use_pseudo_pic_reg ())
6659 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6660 _mcount in prologue. */
6661 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
6664 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6666 || crtl
->calls_eh_return
6667 || crtl
->uses_const_pool
6668 || cfun
->has_nonlocal_label
)
6669 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
6672 if (crtl
->calls_eh_return
&& maybe_eh_return
)
6677 unsigned test
= EH_RETURN_DATA_REGNO (i
);
6678 if (test
== INVALID_REGNUM
)
6685 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
6687 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
6688 + xlogue_layout::MIN_REGS
;
6689 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
6694 && regno
== REGNO (crtl
->drap_reg
)
6695 && !cfun
->machine
->no_drap_save_restore
)
6698 return (df_regs_ever_live_p (regno
)
6699 && !call_used_or_fixed_reg_p (regno
)
6700 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
6703 /* Return number of saved general prupose registers. */
6706 ix86_nsaved_regs (void)
6711 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6712 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6717 /* Return number of saved SSE registers. */
6720 ix86_nsaved_sseregs (void)
6725 if (!TARGET_64BIT_MS_ABI
)
6727 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6728 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6733 /* Given FROM and TO register numbers, say whether this elimination is
6734 allowed. If stack alignment is needed, we can only replace argument
6735 pointer with hard frame pointer, or replace frame pointer with stack
6736 pointer. Otherwise, frame pointer elimination is automatically
6737 handled and all other eliminations are valid. */
6740 ix86_can_eliminate (const int from
, const int to
)
6742 if (stack_realign_fp
)
6743 return ((from
== ARG_POINTER_REGNUM
6744 && to
== HARD_FRAME_POINTER_REGNUM
)
6745 || (from
== FRAME_POINTER_REGNUM
6746 && to
== STACK_POINTER_REGNUM
));
6748 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
6751 /* Return the offset between two registers, one to be eliminated, and the other
6752 its replacement, at the start of a routine. */
6755 ix86_initial_elimination_offset (int from
, int to
)
6757 struct ix86_frame
&frame
= cfun
->machine
->frame
;
6759 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
6760 return frame
.hard_frame_pointer_offset
;
6761 else if (from
== FRAME_POINTER_REGNUM
6762 && to
== HARD_FRAME_POINTER_REGNUM
)
6763 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
6766 gcc_assert (to
== STACK_POINTER_REGNUM
);
6768 if (from
== ARG_POINTER_REGNUM
)
6769 return frame
.stack_pointer_offset
;
6771 gcc_assert (from
== FRAME_POINTER_REGNUM
);
6772 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
6776 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6778 warn_once_call_ms2sysv_xlogues (const char *feature
)
6780 static bool warned_once
= false;
6783 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6789 /* Return the probing interval for -fstack-clash-protection. */
6791 static HOST_WIDE_INT
6792 get_probe_interval (void)
6794 if (flag_stack_clash_protection
)
6795 return (HOST_WIDE_INT_1U
6796 << param_stack_clash_protection_probe_interval
);
6798 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
6801 /* When using -fsplit-stack, the allocation routines set a field in
6802 the TCB to the bottom of the stack plus this much space, measured
6805 #define SPLIT_STACK_AVAILABLE 256
6807 /* Return true if push2/pop2 can be generated. */
6810 ix86_can_use_push2pop2 (void)
6812 /* Use push2/pop2 only if the incoming stack is 16-byte aligned. */
6813 unsigned int incoming_stack_boundary
6814 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
6815 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
6816 return incoming_stack_boundary
% 128 == 0;
6819 /* Helper function to determine whether push2/pop2 can be used in prologue or
6820 epilogue for register save/restore. */
6822 ix86_pro_and_epilogue_can_use_push2pop2 (int nregs
)
6824 if (!ix86_can_use_push2pop2 ())
6826 int aligned
= cfun
->machine
->fs
.sp_offset
% 16 == 0;
6827 return TARGET_APX_PUSH2POP2
6828 && !cfun
->machine
->frame
.save_regs_using_mov
6829 && cfun
->machine
->func_type
== TYPE_NORMAL
6830 && (nregs
+ aligned
) >= 3;
6833 /* Fill structure ix86_frame about frame of currently computed function. */
6836 ix86_compute_frame_layout (void)
6838 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
6839 struct machine_function
*m
= cfun
->machine
;
6840 unsigned HOST_WIDE_INT stack_alignment_needed
;
6841 HOST_WIDE_INT offset
;
6842 unsigned HOST_WIDE_INT preferred_alignment
;
6843 HOST_WIDE_INT size
= ix86_get_frame_size ();
6844 HOST_WIDE_INT to_allocate
;
6846 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6847 * ms_abi functions that call a sysv function. We now need to prune away
6848 * cases where it should be disabled. */
6849 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6851 gcc_assert (TARGET_64BIT_MS_ABI
);
6852 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
6853 gcc_assert (!TARGET_SEH
);
6854 gcc_assert (TARGET_SSE
);
6855 gcc_assert (!ix86_using_red_zone ());
6857 if (crtl
->calls_eh_return
)
6859 gcc_assert (!reload_completed
);
6860 m
->call_ms2sysv
= false;
6861 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6864 else if (ix86_static_chain_on_stack
)
6866 gcc_assert (!reload_completed
);
6867 m
->call_ms2sysv
= false;
6868 warn_once_call_ms2sysv_xlogues ("static call chains");
6871 /* Finally, compute which registers the stub will manage. */
6874 unsigned count
= xlogue_layout::count_stub_managed_regs ();
6875 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
6876 m
->call_ms2sysv_pad_in
= 0;
6880 frame
->nregs
= ix86_nsaved_regs ();
6881 frame
->nsseregs
= ix86_nsaved_sseregs ();
6883 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6884 except for function prologues, leaf functions and when the defult
6885 incoming stack boundary is overriden at command line or via
6886 force_align_arg_pointer attribute.
6888 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6889 at call sites, including profile function calls.
6891 For APX push2/pop2, the stack also requires 128b alignment. */
6892 if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame
->nregs
)
6893 && crtl
->preferred_stack_boundary
< 128)
6894 || (((TARGET_64BIT_MS_ABI
|| TARGET_MACHO
)
6895 && crtl
->preferred_stack_boundary
< 128)
6896 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
6897 || ix86_current_function_calls_tls_descriptor
6898 || (TARGET_MACHO
&& crtl
->profile
)
6899 || ix86_incoming_stack_boundary
< 128)))
6901 crtl
->preferred_stack_boundary
= 128;
6902 if (crtl
->stack_alignment_needed
< 128)
6903 crtl
->stack_alignment_needed
= 128;
6906 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
6907 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
6909 gcc_assert (!size
|| stack_alignment_needed
);
6910 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
6911 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
6913 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6914 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
6915 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6917 gcc_assert (stack_alignment_needed
>= 16);
6918 gcc_assert (!frame
->nsseregs
);
6921 /* For SEH we have to limit the amount of code movement into the prologue.
6922 At present we do this via a BLOCKAGE, at which point there's very little
6923 scheduling that can be done, which means that there's very little point
6924 in doing anything except PUSHs. */
6926 m
->use_fast_prologue_epilogue
= false;
6927 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
6929 int count
= frame
->nregs
;
6930 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
6932 /* The fast prologue uses move instead of push to save registers. This
6933 is significantly longer, but also executes faster as modern hardware
6934 can execute the moves in parallel, but can't do that for push/pop.
6936 Be careful about choosing what prologue to emit: When function takes
6937 many instructions to execute we may use slow version as well as in
6938 case function is known to be outside hot spot (this is known with
6939 feedback only). Weight the size of function by number of registers
6940 to save as it is cheap to use one or two push instructions but very
6941 slow to use many of them.
6943 Calling this hook multiple times with the same frame requirements
6944 must produce the same layout, since the RA might otherwise be
6945 unable to reach a fixed point or might fail its final sanity checks.
6946 This means that once we've assumed that a function does or doesn't
6947 have a particular size, we have to stick to that assumption
6948 regardless of how the function has changed since. */
6950 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
6951 if (node
->frequency
< NODE_FREQUENCY_NORMAL
6952 || (flag_branch_probabilities
6953 && node
->frequency
< NODE_FREQUENCY_HOT
))
6954 m
->use_fast_prologue_epilogue
= false;
6957 if (count
!= frame
->expensive_count
)
6959 frame
->expensive_count
= count
;
6960 frame
->expensive_p
= expensive_function_p (count
);
6962 m
->use_fast_prologue_epilogue
= !frame
->expensive_p
;
6966 frame
->save_regs_using_mov
6967 = TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
;
6969 /* Skip return address and error code in exception handler. */
6970 offset
= INCOMING_FRAME_SP_OFFSET
;
6972 /* Skip pushed static chain. */
6973 if (ix86_static_chain_on_stack
)
6974 offset
+= UNITS_PER_WORD
;
6976 /* Skip saved base pointer. */
6977 if (frame_pointer_needed
)
6978 offset
+= UNITS_PER_WORD
;
6979 frame
->hfp_save_offset
= offset
;
6981 /* The traditional frame pointer location is at the top of the frame. */
6982 frame
->hard_frame_pointer_offset
= offset
;
6984 /* Register save area */
6985 offset
+= frame
->nregs
* UNITS_PER_WORD
;
6986 frame
->reg_save_offset
= offset
;
6988 /* Calculate the size of the va-arg area (not including padding, if any). */
6989 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
6991 /* Also adjust stack_realign_offset for the largest alignment of
6992 stack slot actually used. */
6993 if (stack_realign_fp
6994 || (cfun
->machine
->max_used_stack_alignment
!= 0
6995 && (offset
% cfun
->machine
->max_used_stack_alignment
) != 0))
6997 /* We may need a 16-byte aligned stack for the remainder of the
6998 register save area, but the stack frame for the local function
6999 may require a greater alignment if using AVX/2/512. In order
7000 to avoid wasting space, we first calculate the space needed for
7001 the rest of the register saves, add that to the stack pointer,
7002 and then realign the stack to the boundary of the start of the
7003 frame for the local function. */
7004 HOST_WIDE_INT space_needed
= 0;
7005 HOST_WIDE_INT sse_reg_space_needed
= 0;
7009 if (m
->call_ms2sysv
)
7011 m
->call_ms2sysv_pad_in
= 0;
7012 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
7015 else if (frame
->nsseregs
)
7016 /* The only ABI that has saved SSE registers (Win64) also has a
7017 16-byte aligned default stack. However, many programs violate
7018 the ABI, and Wine64 forces stack realignment to compensate. */
7019 space_needed
= frame
->nsseregs
* 16;
7021 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
7023 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
7024 rounding to be pedantic. */
7025 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
7028 space_needed
= frame
->va_arg_size
;
7030 /* Record the allocation size required prior to the realignment AND. */
7031 frame
->stack_realign_allocate
= space_needed
;
7033 /* The re-aligned stack starts at frame->stack_realign_offset. Values
7034 before this point are not directly comparable with values below
7035 this point. Use sp_valid_at to determine if the stack pointer is
7036 valid for a given offset, fp_valid_at for the frame pointer, or
7037 choose_baseaddr to have a base register chosen for you.
7039 Note that the result of (frame->stack_realign_offset
7040 & (stack_alignment_needed - 1)) may not equal zero. */
7041 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
7042 frame
->stack_realign_offset
= offset
- space_needed
;
7043 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
7044 + sse_reg_space_needed
;
7048 frame
->stack_realign_offset
= offset
;
7050 if (TARGET_64BIT
&& m
->call_ms2sysv
)
7052 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
7053 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
7056 /* Align and set SSE register save area. */
7057 else if (frame
->nsseregs
)
7059 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
7060 required and the DRAP re-alignment boundary is at least 16 bytes,
7061 then we want the SSE register save area properly aligned. */
7062 if (ix86_incoming_stack_boundary
>= 128
7063 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
7064 offset
= ROUND_UP (offset
, 16);
7065 offset
+= frame
->nsseregs
* 16;
7067 frame
->sse_reg_save_offset
= offset
;
7068 offset
+= frame
->va_arg_size
;
7071 /* Align start of frame for local function. When a function call
7072 is removed, it may become a leaf function. But if argument may
7073 be passed on stack, we need to align the stack when there is no
7076 || frame
->va_arg_size
!= 0
7079 || (!crtl
->tail_call_emit
7080 && cfun
->machine
->outgoing_args_on_stack
)
7081 || cfun
->calls_alloca
7082 || ix86_current_function_calls_tls_descriptor
)
7083 offset
= ROUND_UP (offset
, stack_alignment_needed
);
7085 /* Frame pointer points here. */
7086 frame
->frame_pointer_offset
= offset
;
7090 /* Add outgoing arguments area. Can be skipped if we eliminated
7091 all the function calls as dead code.
7092 Skipping is however impossible when function calls alloca. Alloca
7093 expander assumes that last crtl->outgoing_args_size
7094 of stack frame are unused. */
7095 if (ACCUMULATE_OUTGOING_ARGS
7096 && (!crtl
->is_leaf
|| cfun
->calls_alloca
7097 || ix86_current_function_calls_tls_descriptor
))
7099 offset
+= crtl
->outgoing_args_size
;
7100 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
7103 frame
->outgoing_arguments_size
= 0;
7105 /* Align stack boundary. Only needed if we're calling another function
7107 if (!crtl
->is_leaf
|| cfun
->calls_alloca
7108 || ix86_current_function_calls_tls_descriptor
)
7109 offset
= ROUND_UP (offset
, preferred_alignment
);
7111 /* We've reached end of stack frame. */
7112 frame
->stack_pointer_offset
= offset
;
7114 /* Size prologue needs to allocate. */
7115 to_allocate
= offset
- frame
->sse_reg_save_offset
;
7117 if ((!to_allocate
&& frame
->nregs
<= 1)
7118 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000))
7119 /* If static stack checking is enabled and done with probes,
7120 the registers need to be saved before allocating the frame. */
7121 || flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
7122 /* If stack clash probing needs a loop, then it needs a
7123 scratch register. But the returned register is only guaranteed
7124 to be safe to use after register saves are complete. So if
7125 stack clash protections are enabled and the allocated frame is
7126 larger than the probe interval, then use pushes to save
7127 callee saved registers. */
7128 || (flag_stack_clash_protection
7129 && !ix86_target_stack_probe ()
7130 && to_allocate
> get_probe_interval ()))
7131 frame
->save_regs_using_mov
= false;
7133 if (ix86_using_red_zone ()
7134 && crtl
->sp_is_unchanging
7136 && !ix86_pc_thunk_call_expanded
7137 && !ix86_current_function_calls_tls_descriptor
)
7139 frame
->red_zone_size
= to_allocate
;
7140 if (frame
->save_regs_using_mov
)
7141 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
7142 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
7143 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
7146 frame
->red_zone_size
= 0;
7147 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
7149 /* The SEH frame pointer location is near the bottom of the frame.
7150 This is enforced by the fact that the difference between the
7151 stack pointer and the frame pointer is limited to 240 bytes in
7152 the unwind data structure. */
7155 /* Force the frame pointer to point at or below the lowest register save
7156 area, see the SEH code in config/i386/winnt.cc for the rationale. */
7157 frame
->hard_frame_pointer_offset
= frame
->sse_reg_save_offset
;
7159 /* If we can leave the frame pointer where it is, do so; however return
7160 the establisher frame for __builtin_frame_address (0) or else if the
7161 frame overflows the SEH maximum frame size.
7163 Note that the value returned by __builtin_frame_address (0) is quite
7164 constrained, because setjmp is piggybacked on the SEH machinery with
7165 recent versions of MinGW:
7167 # elif defined(__SEH__)
7168 # if defined(__aarch64__) || defined(_ARM64_)
7169 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
7170 # elif (__MINGW_GCC_VERSION < 40702)
7171 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
7173 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
7176 and the second argument passed to _setjmp, if not null, is forwarded
7177 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
7178 built an ExceptionRecord on the fly describing the setjmp buffer). */
7179 const HOST_WIDE_INT diff
7180 = frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
7181 if (diff
<= 255 && !crtl
->accesses_prior_frames
)
7183 /* The resulting diff will be a multiple of 16 lower than 255,
7184 i.e. at most 240 as required by the unwind data structure. */
7185 frame
->hard_frame_pointer_offset
+= (diff
& 15);
7187 else if (diff
<= SEH_MAX_FRAME_SIZE
&& !crtl
->accesses_prior_frames
)
7189 /* Ideally we'd determine what portion of the local stack frame
7190 (within the constraint of the lowest 240) is most heavily used.
7191 But without that complication, simply bias the frame pointer
7192 by 128 bytes so as to maximize the amount of the local stack
7193 frame that is addressable with 8-bit offsets. */
7194 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
7197 frame
->hard_frame_pointer_offset
= frame
->hfp_save_offset
;
7201 /* This is semi-inlined memory_address_length, but simplified
7202 since we know that we're always dealing with reg+offset, and
7203 to avoid having to create and discard all that rtl. */
7206 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
7212 /* EBP and R13 cannot be encoded without an offset. */
7213 len
= (regno
== BP_REG
|| regno
== R13_REG
);
7215 else if (IN_RANGE (offset
, -128, 127))
7218 /* ESP and R12 must be encoded with a SIB byte. */
7219 if (regno
== SP_REG
|| regno
== R12_REG
)
7225 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7226 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7229 sp_valid_at (HOST_WIDE_INT cfa_offset
)
7231 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
7232 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
7234 /* Validate that the cfa_offset isn't in a "no-man's land". */
7235 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
7241 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7242 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7245 fp_valid_at (HOST_WIDE_INT cfa_offset
)
7247 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
7248 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
7250 /* Validate that the cfa_offset isn't in a "no-man's land". */
7251 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
7257 /* Choose a base register based upon alignment requested, speed and/or
7261 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
7262 HOST_WIDE_INT
&base_offset
,
7263 unsigned int align_reqested
, unsigned int *align
)
7265 const struct machine_function
*m
= cfun
->machine
;
7266 unsigned int hfp_align
;
7267 unsigned int drap_align
;
7268 unsigned int sp_align
;
7269 bool hfp_ok
= fp_valid_at (cfa_offset
);
7270 bool drap_ok
= m
->fs
.drap_valid
;
7271 bool sp_ok
= sp_valid_at (cfa_offset
);
7273 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
7275 /* Filter out any registers that don't meet the requested alignment
7279 if (m
->fs
.realigned
)
7280 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
7281 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7282 notes (which we would need to use a realigned stack pointer),
7283 so disable on SEH targets. */
7284 else if (m
->fs
.sp_realigned
)
7285 sp_align
= crtl
->stack_alignment_needed
;
7287 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
7288 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
7289 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
7292 if (m
->use_fast_prologue_epilogue
)
7294 /* Choose the base register most likely to allow the most scheduling
7295 opportunities. Generally FP is valid throughout the function,
7296 while DRAP must be reloaded within the epilogue. But choose either
7297 over the SP due to increased encoding size. */
7301 base_reg
= hard_frame_pointer_rtx
;
7302 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
7306 base_reg
= crtl
->drap_reg
;
7307 base_offset
= 0 - cfa_offset
;
7311 base_reg
= stack_pointer_rtx
;
7312 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
7317 HOST_WIDE_INT toffset
;
7320 /* Choose the base register with the smallest address encoding.
7321 With a tie, choose FP > DRAP > SP. */
7324 base_reg
= stack_pointer_rtx
;
7325 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
7326 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
7330 toffset
= 0 - cfa_offset
;
7331 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
7334 base_reg
= crtl
->drap_reg
;
7335 base_offset
= toffset
;
7341 toffset
= m
->fs
.fp_offset
- cfa_offset
;
7342 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
7345 base_reg
= hard_frame_pointer_rtx
;
7346 base_offset
= toffset
;
7351 /* Set the align return value. */
7354 if (base_reg
== stack_pointer_rtx
)
7356 else if (base_reg
== crtl
->drap_reg
)
7357 *align
= drap_align
;
7358 else if (base_reg
== hard_frame_pointer_rtx
)
7363 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7364 the alignment of address. If ALIGN is non-null, it should point to
7365 an alignment value (in bits) that is preferred or zero and will
7366 recieve the alignment of the base register that was selected,
7367 irrespective of rather or not CFA_OFFSET is a multiple of that
7368 alignment value. If it is possible for the base register offset to be
7369 non-immediate then SCRATCH_REGNO should specify a scratch register to
7372 The valid base registers are taken from CFUN->MACHINE->FS. */
7375 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
7376 unsigned int scratch_regno
= INVALID_REGNUM
)
7378 rtx base_reg
= NULL
;
7379 HOST_WIDE_INT base_offset
= 0;
7381 /* If a specific alignment is requested, try to get a base register
7382 with that alignment first. */
7383 if (align
&& *align
)
7384 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
7387 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
7389 gcc_assert (base_reg
!= NULL
);
7391 rtx base_offset_rtx
= GEN_INT (base_offset
);
7393 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
7395 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
7397 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
7398 emit_move_insn (scratch_reg
, base_offset_rtx
);
7400 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
7403 return plus_constant (Pmode
, base_reg
, base_offset
);
7406 /* Emit code to save registers in the prologue. */
7409 ix86_emit_save_regs (void)
7414 if (!TARGET_APX_PUSH2POP2
7415 || !ix86_can_use_push2pop2 ()
7416 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
7418 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
7419 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7421 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
),
7423 RTX_FRAME_RELATED_P (insn
) = 1;
7429 regno_list
[0] = regno_list
[1] = -1;
7430 int loaded_regnum
= 0;
7431 bool aligned
= cfun
->machine
->fs
.sp_offset
% 16 == 0;
7433 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
7434 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7438 regno_list
[loaded_regnum
++] = regno
;
7439 if (loaded_regnum
== 2)
7441 gcc_assert (regno_list
[0] != -1
7442 && regno_list
[1] != -1
7443 && regno_list
[0] != regno_list
[1]);
7444 const int offset
= UNITS_PER_WORD
* 2;
7445 rtx mem
= gen_rtx_MEM (TImode
,
7446 gen_rtx_PRE_DEC (Pmode
,
7447 stack_pointer_rtx
));
7448 insn
= emit_insn (gen_push2 (mem
,
7449 gen_rtx_REG (word_mode
,
7451 gen_rtx_REG (word_mode
,
7454 RTX_FRAME_RELATED_P (insn
) = 1;
7455 rtx dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (3));
7457 for (int i
= 0; i
< 2; i
++)
7459 rtx dwarf_reg
= gen_rtx_REG (word_mode
,
7461 rtx sp_offset
= plus_constant (Pmode
,
7465 rtx tmp
= gen_rtx_SET (gen_frame_mem (DImode
,
7468 RTX_FRAME_RELATED_P (tmp
) = 1;
7469 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
7471 rtx sp_tmp
= gen_rtx_SET (stack_pointer_rtx
,
7472 plus_constant (Pmode
,
7475 RTX_FRAME_RELATED_P (sp_tmp
) = 1;
7476 XVECEXP (dwarf
, 0, 0) = sp_tmp
;
7477 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
7480 regno_list
[0] = regno_list
[1] = -1;
7485 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
),
7487 RTX_FRAME_RELATED_P (insn
) = 1;
7491 if (loaded_regnum
== 1)
7493 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
,
7496 RTX_FRAME_RELATED_P (insn
) = 1;
7501 /* Emit a single register save at CFA - CFA_OFFSET. */
7504 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
7505 HOST_WIDE_INT cfa_offset
)
7507 struct machine_function
*m
= cfun
->machine
;
7508 rtx reg
= gen_rtx_REG (mode
, regno
);
7509 rtx mem
, addr
, base
, insn
;
7510 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
7512 addr
= choose_baseaddr (cfa_offset
, &align
);
7513 mem
= gen_frame_mem (mode
, addr
);
7515 /* The location aligment depends upon the base register. */
7516 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
7517 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
7518 set_mem_align (mem
, align
);
7520 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
7521 RTX_FRAME_RELATED_P (insn
) = 1;
7524 if (GET_CODE (base
) == PLUS
)
7525 base
= XEXP (base
, 0);
7526 gcc_checking_assert (REG_P (base
));
7528 /* When saving registers into a re-aligned local stack frame, avoid
7529 any tricky guessing by dwarf2out. */
7530 if (m
->fs
.realigned
)
7532 gcc_checking_assert (stack_realign_drap
);
7534 if (regno
== REGNO (crtl
->drap_reg
))
7536 /* A bit of a hack. We force the DRAP register to be saved in
7537 the re-aligned stack frame, which provides us with a copy
7538 of the CFA that will last past the prologue. Install it. */
7539 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
7540 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
7541 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
7542 mem
= gen_rtx_MEM (mode
, addr
);
7543 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
7547 /* The frame pointer is a stable reference within the
7548 aligned frame. Use it. */
7549 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
7550 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
7551 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
7552 mem
= gen_rtx_MEM (mode
, addr
);
7553 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
7557 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
7558 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
7560 gcc_checking_assert (stack_realign_fp
);
7561 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
7564 /* The memory may not be relative to the current CFA register,
7565 which means that we may need to generate a new pattern for
7566 use by the unwind info. */
7567 else if (base
!= m
->fs
.cfa_reg
)
7569 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
7570 m
->fs
.cfa_offset
- cfa_offset
);
7571 mem
= gen_rtx_MEM (mode
, addr
);
7572 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
7576 /* Emit code to save registers using MOV insns.
7577 First register is stored at CFA - CFA_OFFSET. */
7579 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
7583 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7584 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7586 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
7587 cfa_offset
-= UNITS_PER_WORD
;
7591 /* Emit code to save SSE registers using MOV insns.
7592 First register is stored at CFA - CFA_OFFSET. */
7594 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
7598 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7599 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7601 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
7602 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
7606 static GTY(()) rtx queued_cfa_restores
;
7608 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7609 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7610 Don't add the note if the previously saved value will be left untouched
7611 within stack red-zone till return, as unwinders can find the same value
7612 in the register and on the stack. */
7615 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
7617 if (!crtl
->shrink_wrapped
7618 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
7623 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
7624 RTX_FRAME_RELATED_P (insn
) = 1;
7628 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
7631 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7634 ix86_add_queued_cfa_restore_notes (rtx insn
)
7637 if (!queued_cfa_restores
)
7639 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
7641 XEXP (last
, 1) = REG_NOTES (insn
);
7642 REG_NOTES (insn
) = queued_cfa_restores
;
7643 queued_cfa_restores
= NULL_RTX
;
7644 RTX_FRAME_RELATED_P (insn
) = 1;
7647 /* Expand prologue or epilogue stack adjustment.
7648 The pattern exist to put a dependency on all ebp-based memory accesses.
7649 STYLE should be negative if instructions should be marked as frame related,
7650 zero if %r11 register is live and cannot be freely used and positive
7654 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
7655 int style
, bool set_cfa
)
7657 struct machine_function
*m
= cfun
->machine
;
7658 rtx addend
= offset
;
7660 bool add_frame_related_expr
= false;
7662 if (!x86_64_immediate_operand (offset
, Pmode
))
7664 /* r11 is used by indirect sibcall return as well, set before the
7665 epilogue and used after the epilogue. */
7667 addend
= gen_rtx_REG (Pmode
, R11_REG
);
7670 gcc_assert (src
!= hard_frame_pointer_rtx
7671 && dest
!= hard_frame_pointer_rtx
);
7672 addend
= hard_frame_pointer_rtx
;
7674 emit_insn (gen_rtx_SET (addend
, offset
));
7676 add_frame_related_expr
= true;
7679 insn
= emit_insn (gen_pro_epilogue_adjust_stack_add
7680 (Pmode
, dest
, src
, addend
));
7682 ix86_add_queued_cfa_restore_notes (insn
);
7688 gcc_assert (m
->fs
.cfa_reg
== src
);
7689 m
->fs
.cfa_offset
+= INTVAL (offset
);
7690 m
->fs
.cfa_reg
= dest
;
7692 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7693 r
= gen_rtx_SET (dest
, r
);
7694 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
7695 RTX_FRAME_RELATED_P (insn
) = 1;
7699 RTX_FRAME_RELATED_P (insn
) = 1;
7700 if (add_frame_related_expr
)
7702 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7703 r
= gen_rtx_SET (dest
, r
);
7704 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
7708 if (dest
== stack_pointer_rtx
)
7710 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
7711 bool valid
= m
->fs
.sp_valid
;
7712 bool realigned
= m
->fs
.sp_realigned
;
7714 if (src
== hard_frame_pointer_rtx
)
7716 valid
= m
->fs
.fp_valid
;
7718 ooffset
= m
->fs
.fp_offset
;
7720 else if (src
== crtl
->drap_reg
)
7722 valid
= m
->fs
.drap_valid
;
7728 /* Else there are two possibilities: SP itself, which we set
7729 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7730 taken care of this by hand along the eh_return path. */
7731 gcc_checking_assert (src
== stack_pointer_rtx
7732 || offset
== const0_rtx
);
7735 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
7736 m
->fs
.sp_valid
= valid
;
7737 m
->fs
.sp_realigned
= realigned
;
7742 /* Find an available register to be used as dynamic realign argument
7743 pointer regsiter. Such a register will be written in prologue and
7744 used in begin of body, so it must not be
7745 1. parameter passing register.
7747 We reuse static-chain register if it is available. Otherwise, we
7748 use DI for i386 and R13 for x86-64. We chose R13 since it has
7751 Return: the regno of chosen register. */
7754 find_drap_reg (void)
7756 tree decl
= cfun
->decl
;
7758 /* Always use callee-saved register if there are no caller-saved
7762 /* Use R13 for nested function or function need static chain.
7763 Since function with tail call may use any caller-saved
7764 registers in epilogue, DRAP must not use caller-saved
7765 register in such case. */
7766 if (DECL_STATIC_CHAIN (decl
)
7767 || (cfun
->machine
->call_saved_registers
7768 == TYPE_NO_CALLER_SAVED_REGISTERS
)
7769 || crtl
->tail_call_emit
)
7776 /* Use DI for nested function or function need static chain.
7777 Since function with tail call may use any caller-saved
7778 registers in epilogue, DRAP must not use caller-saved
7779 register in such case. */
7780 if (DECL_STATIC_CHAIN (decl
)
7781 || (cfun
->machine
->call_saved_registers
7782 == TYPE_NO_CALLER_SAVED_REGISTERS
)
7783 || crtl
->tail_call_emit
7784 || crtl
->calls_eh_return
)
7787 /* Reuse static chain register if it isn't used for parameter
7789 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
7791 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
7792 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
7799 /* Return minimum incoming stack alignment. */
7802 ix86_minimum_incoming_stack_boundary (bool sibcall
)
7804 unsigned int incoming_stack_boundary
;
7806 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7807 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7808 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
7809 /* Prefer the one specified at command line. */
7810 else if (ix86_user_incoming_stack_boundary
)
7811 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
7812 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7813 if -mstackrealign is used, it isn't used for sibcall check and
7814 estimated stack alignment is 128bit. */
7816 && ix86_force_align_arg_pointer
7817 && crtl
->stack_alignment_estimated
== 128)
7818 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7820 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
7822 /* Incoming stack alignment can be changed on individual functions
7823 via force_align_arg_pointer attribute. We use the smallest
7824 incoming stack boundary. */
7825 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
7826 && lookup_attribute ("force_align_arg_pointer",
7827 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
7828 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7830 /* The incoming stack frame has to be aligned at least at
7831 parm_stack_boundary. */
7832 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
7833 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
7835 /* Stack at entrance of main is aligned by runtime. We use the
7836 smallest incoming stack boundary. */
7837 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
7838 && DECL_NAME (current_function_decl
)
7839 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
7840 && DECL_FILE_SCOPE_P (current_function_decl
))
7841 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
7843 return incoming_stack_boundary
;
7846 /* Update incoming stack boundary and estimated stack alignment. */
7849 ix86_update_stack_boundary (void)
7851 ix86_incoming_stack_boundary
7852 = ix86_minimum_incoming_stack_boundary (false);
7854 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7857 && crtl
->stack_alignment_estimated
< 128)
7858 crtl
->stack_alignment_estimated
= 128;
7860 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7861 if (ix86_tls_descriptor_calls_expanded_in_cfun
7862 && crtl
->preferred_stack_boundary
< 128)
7863 crtl
->preferred_stack_boundary
= 128;
7866 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7867 needed or an rtx for DRAP otherwise. */
7870 ix86_get_drap_rtx (void)
7872 /* We must use DRAP if there are outgoing arguments on stack or
7873 the stack pointer register is clobbered by asm statment and
7874 ACCUMULATE_OUTGOING_ARGS is false. */
7876 || ((cfun
->machine
->outgoing_args_on_stack
7877 || crtl
->sp_is_clobbered_by_asm
)
7878 && !ACCUMULATE_OUTGOING_ARGS
))
7879 crtl
->need_drap
= true;
7881 if (stack_realign_drap
)
7883 /* Assign DRAP to vDRAP and returns vDRAP */
7884 unsigned int regno
= find_drap_reg ();
7887 rtx_insn
*seq
, *insn
;
7889 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
7890 crtl
->drap_reg
= arg_ptr
;
7893 drap_vreg
= copy_to_reg (arg_ptr
);
7897 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
7900 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
7901 RTX_FRAME_RELATED_P (insn
) = 1;
7909 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7912 ix86_internal_arg_pointer (void)
7914 return virtual_incoming_args_rtx
;
7917 struct scratch_reg
{
7922 /* Return a short-lived scratch register for use on function entry.
7923 In 32-bit mode, it is valid only after the registers are saved
7924 in the prologue. This register must be released by means of
7925 release_scratch_register_on_entry once it is dead. */
7928 get_scratch_register_on_entry (struct scratch_reg
*sr
)
7936 /* We always use R11 in 64-bit mode. */
7941 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
7943 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7945 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7946 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
7947 int regparm
= ix86_function_regparm (fntype
, decl
);
7949 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
7951 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7952 for the static chain register. */
7953 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
7954 && drap_regno
!= AX_REG
)
7956 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7957 for the static chain register. */
7958 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
7960 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
7962 /* ecx is the static chain register. */
7963 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
7965 && drap_regno
!= CX_REG
)
7967 else if (ix86_save_reg (BX_REG
, true, false))
7969 /* esi is the static chain register. */
7970 else if (!(regparm
== 3 && static_chain_p
)
7971 && ix86_save_reg (SI_REG
, true, false))
7973 else if (ix86_save_reg (DI_REG
, true, false))
7977 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
7982 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
7985 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
7986 RTX_FRAME_RELATED_P (insn
) = 1;
7990 /* Release a scratch register obtained from the preceding function.
7992 If RELEASE_VIA_POP is true, we just pop the register off the stack
7993 to release it. This is what non-Linux systems use with -fstack-check.
7995 Otherwise we use OFFSET to locate the saved register and the
7996 allocated stack space becomes part of the local frame and is
7997 deallocated by the epilogue. */
8000 release_scratch_register_on_entry (struct scratch_reg
*sr
, HOST_WIDE_INT offset
,
8001 bool release_via_pop
)
8005 if (release_via_pop
)
8007 struct machine_function
*m
= cfun
->machine
;
8008 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
8010 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
8011 RTX_FRAME_RELATED_P (insn
) = 1;
8012 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8013 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8014 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
8015 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8019 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
8020 x
= gen_rtx_SET (sr
->reg
, gen_rtx_MEM (word_mode
, x
));
8026 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
8028 If INT_REGISTERS_SAVED is true, then integer registers have already been
8029 pushed on the stack.
8031 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
8034 This assumes no knowledge of the current probing state, i.e. it is never
8035 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
8036 a suitable probe. */
8039 ix86_adjust_stack_and_probe (HOST_WIDE_INT size
,
8040 const bool int_registers_saved
,
8041 const bool protection_area
)
8043 struct machine_function
*m
= cfun
->machine
;
8045 /* If this function does not statically allocate stack space, then
8046 no probes are needed. */
8049 /* However, the allocation of space via pushes for register
8050 saves could be viewed as allocating space, but without the
8052 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
8053 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
8055 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
8059 /* If we are a noreturn function, then we have to consider the
8060 possibility that we're called via a jump rather than a call.
8062 Thus we don't have the implicit probe generated by saving the
8063 return address into the stack at the call. Thus, the stack
8064 pointer could be anywhere in the guard page. The safe thing
8065 to do is emit a probe now.
8067 The probe can be avoided if we have already emitted any callee
8068 register saves into the stack or have a frame pointer (which will
8069 have been saved as well). Those saves will function as implicit
8072 ?!? This should be revamped to work like aarch64 and s390 where
8073 we track the offset from the most recent probe. Normally that
8074 offset would be zero. For a noreturn function we would reset
8075 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
8076 we just probe when we cross PROBE_INTERVAL. */
8077 if (TREE_THIS_VOLATILE (cfun
->decl
)
8078 && !(m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
))
8080 /* We can safely use any register here since we're just going to push
8081 its value and immediately pop it back. But we do try and avoid
8082 argument passing registers so as not to introduce dependencies in
8083 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
8084 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
8085 rtx_insn
*insn_push
= emit_insn (gen_push (dummy_reg
));
8086 rtx_insn
*insn_pop
= emit_insn (gen_pop (dummy_reg
));
8087 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8088 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8090 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8091 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
8092 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8093 add_reg_note (insn_push
, REG_CFA_ADJUST_CFA
, x
);
8094 RTX_FRAME_RELATED_P (insn_push
) = 1;
8095 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8096 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8097 add_reg_note (insn_pop
, REG_CFA_ADJUST_CFA
, x
);
8098 RTX_FRAME_RELATED_P (insn_pop
) = 1;
8100 emit_insn (gen_blockage ());
8103 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
8104 const int dope
= 4 * UNITS_PER_WORD
;
8106 /* If there is protection area, take it into account in the size. */
8107 if (protection_area
)
8108 size
+= probe_interval
+ dope
;
8110 /* If we allocate less than the size of the guard statically,
8111 then no probing is necessary, but we do need to allocate
8113 else if (size
< (1 << param_stack_clash_protection_guard_size
))
8115 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8116 GEN_INT (-size
), -1,
8117 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8118 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
8122 /* We're allocating a large enough stack frame that we need to
8123 emit probes. Either emit them inline or in a loop depending
8125 if (size
<= 4 * probe_interval
)
8128 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
8130 /* Allocate PROBE_INTERVAL bytes. */
8132 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8133 GEN_INT (-probe_interval
), -1,
8134 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8135 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
8137 /* And probe at *sp. */
8138 emit_stack_probe (stack_pointer_rtx
);
8139 emit_insn (gen_blockage ());
8142 /* We need to allocate space for the residual, but we do not need
8143 to probe the residual... */
8144 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
8147 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8148 GEN_INT (residual
), -1,
8149 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8151 /* ...except if there is a protection area to maintain. */
8152 if (protection_area
)
8153 emit_stack_probe (stack_pointer_rtx
);
8156 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
8160 /* We expect the GP registers to be saved when probes are used
8161 as the probing sequences might need a scratch register and
8162 the routine to allocate one assumes the integer registers
8163 have already been saved. */
8164 gcc_assert (int_registers_saved
);
8166 struct scratch_reg sr
;
8167 get_scratch_register_on_entry (&sr
);
8169 /* If we needed to save a register, then account for any space
8170 that was pushed (we are not going to pop the register when
8171 we do the restore). */
8173 size
-= UNITS_PER_WORD
;
8175 /* Step 1: round SIZE down to a multiple of the interval. */
8176 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
8178 /* Step 2: compute final value of the loop counter. Use lea if
8180 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
8182 if (address_no_seg_operand (addr
, Pmode
))
8183 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
8186 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
8187 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
8188 gen_rtx_PLUS (Pmode
, sr
.reg
,
8189 stack_pointer_rtx
)));
8191 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8193 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8194 plus_constant (Pmode
, sr
.reg
,
8195 m
->fs
.cfa_offset
+ rounded_size
));
8196 RTX_FRAME_RELATED_P (insn
) = 1;
8199 /* Step 3: the loop. */
8200 rtx size_rtx
= GEN_INT (rounded_size
);
8201 insn
= emit_insn (gen_adjust_stack_and_probe (Pmode
, sr
.reg
, sr
.reg
,
8203 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8205 m
->fs
.cfa_offset
+= rounded_size
;
8206 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8207 plus_constant (Pmode
, stack_pointer_rtx
,
8209 RTX_FRAME_RELATED_P (insn
) = 1;
8211 m
->fs
.sp_offset
+= rounded_size
;
8212 emit_insn (gen_blockage ());
8214 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
8215 is equal to ROUNDED_SIZE. */
8217 if (size
!= rounded_size
)
8219 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8220 GEN_INT (rounded_size
- size
), -1,
8221 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8223 if (protection_area
)
8224 emit_stack_probe (stack_pointer_rtx
);
8227 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
8229 /* This does not deallocate the space reserved for the scratch
8230 register. That will be deallocated in the epilogue. */
8231 release_scratch_register_on_entry (&sr
, size
, false);
8234 /* Adjust back to account for the protection area. */
8235 if (protection_area
)
8236 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8237 GEN_INT (probe_interval
+ dope
), -1,
8238 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8240 /* Make sure nothing is scheduled before we are done. */
8241 emit_insn (gen_blockage ());
8244 /* Adjust the stack pointer up to REG while probing it. */
8247 output_adjust_stack_and_probe (rtx reg
)
8249 static int labelno
= 0;
8253 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
8256 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
8258 /* SP = SP + PROBE_INTERVAL. */
8259 xops
[0] = stack_pointer_rtx
;
8260 xops
[1] = GEN_INT (get_probe_interval ());
8261 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
8264 xops
[1] = const0_rtx
;
8265 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
8267 /* Test if SP == LAST_ADDR. */
8268 xops
[0] = stack_pointer_rtx
;
8270 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
8273 fputs ("\tjne\t", asm_out_file
);
8274 assemble_name_raw (asm_out_file
, loop_lab
);
8275 fputc ('\n', asm_out_file
);
8280 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
8281 inclusive. These are offsets from the current stack pointer.
8283 INT_REGISTERS_SAVED is true if integer registers have already been
8284 pushed on the stack. */
8287 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
8288 const bool int_registers_saved
)
8290 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
8292 /* See if we have a constant small number of probes to generate. If so,
8293 that's the easy case. The run-time loop is made up of 6 insns in the
8294 generic case while the compile-time loop is made up of n insns for n #
8296 if (size
<= 6 * probe_interval
)
8300 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8301 it exceeds SIZE. If only one probe is needed, this will not
8302 generate any code. Then probe at FIRST + SIZE. */
8303 for (i
= probe_interval
; i
< size
; i
+= probe_interval
)
8304 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
8307 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
8311 /* Otherwise, do the same as above, but in a loop. Note that we must be
8312 extra careful with variables wrapping around because we might be at
8313 the very top (or the very bottom) of the address space and we have
8314 to be able to handle this case properly; in particular, we use an
8315 equality test for the loop condition. */
8318 /* We expect the GP registers to be saved when probes are used
8319 as the probing sequences might need a scratch register and
8320 the routine to allocate one assumes the integer registers
8321 have already been saved. */
8322 gcc_assert (int_registers_saved
);
8324 HOST_WIDE_INT rounded_size
, last
;
8325 struct scratch_reg sr
;
8327 get_scratch_register_on_entry (&sr
);
8330 /* Step 1: round SIZE to the previous multiple of the interval. */
8332 rounded_size
= ROUND_DOWN (size
, probe_interval
);
8335 /* Step 2: compute initial and final value of the loop counter. */
8337 /* TEST_OFFSET = FIRST. */
8338 emit_move_insn (sr
.reg
, GEN_INT (-first
));
8340 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8341 last
= first
+ rounded_size
;
8348 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8351 while (TEST_ADDR != LAST_ADDR)
8353 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8354 until it is equal to ROUNDED_SIZE. */
8357 (gen_probe_stack_range (Pmode
, sr
.reg
, sr
.reg
, GEN_INT (-last
)));
8360 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8361 that SIZE is equal to ROUNDED_SIZE. */
8363 if (size
!= rounded_size
)
8364 emit_stack_probe (plus_constant (Pmode
,
8365 gen_rtx_PLUS (Pmode
,
8368 rounded_size
- size
));
8370 release_scratch_register_on_entry (&sr
, size
, true);
8373 /* Make sure nothing is scheduled before we are done. */
8374 emit_insn (gen_blockage ());
8377 /* Probe a range of stack addresses from REG to END, inclusive. These are
8378 offsets from the current stack pointer. */
8381 output_probe_stack_range (rtx reg
, rtx end
)
8383 static int labelno
= 0;
8387 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
8390 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
8392 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8394 xops
[1] = GEN_INT (get_probe_interval ());
8395 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
8397 /* Probe at TEST_ADDR. */
8398 xops
[0] = stack_pointer_rtx
;
8400 xops
[2] = const0_rtx
;
8401 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
8403 /* Test if TEST_ADDR == LAST_ADDR. */
8406 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
8409 fputs ("\tjne\t", asm_out_file
);
8410 assemble_name_raw (asm_out_file
, loop_lab
);
8411 fputc ('\n', asm_out_file
);
8416 /* Set stack_frame_required to false if stack frame isn't required.
8417 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8418 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8421 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment
,
8422 bool check_stack_slot
)
8424 HARD_REG_SET set_up_by_prologue
, prologue_used
;
8427 CLEAR_HARD_REG_SET (prologue_used
);
8428 CLEAR_HARD_REG_SET (set_up_by_prologue
);
8429 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
8430 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
8431 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
8432 HARD_FRAME_POINTER_REGNUM
);
8434 /* The preferred stack alignment is the minimum stack alignment. */
8435 if (stack_alignment
> crtl
->preferred_stack_boundary
)
8436 stack_alignment
= crtl
->preferred_stack_boundary
;
8438 bool require_stack_frame
= false;
8440 FOR_EACH_BB_FN (bb
, cfun
)
8443 FOR_BB_INSNS (bb
, insn
)
8444 if (NONDEBUG_INSN_P (insn
)
8445 && requires_stack_frame_p (insn
, prologue_used
,
8446 set_up_by_prologue
))
8448 require_stack_frame
= true;
8450 if (check_stack_slot
)
8452 /* Find the maximum stack alignment. */
8453 subrtx_iterator::array_type array
;
8454 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
8456 && (reg_mentioned_p (stack_pointer_rtx
,
8458 || reg_mentioned_p (frame_pointer_rtx
,
8461 unsigned int alignment
= MEM_ALIGN (*iter
);
8462 if (alignment
> stack_alignment
)
8463 stack_alignment
= alignment
;
8469 cfun
->machine
->stack_frame_required
= require_stack_frame
;
8472 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8473 will guide prologue/epilogue to be generated in correct form. */
8476 ix86_finalize_stack_frame_flags (void)
8478 /* Check if stack realign is really needed after reload, and
8479 stores result in cfun */
8480 unsigned int incoming_stack_boundary
8481 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
8482 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
8483 unsigned int stack_alignment
8484 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
8485 ? crtl
->max_used_stack_slot_alignment
8486 : crtl
->stack_alignment_needed
);
8487 unsigned int stack_realign
8488 = (incoming_stack_boundary
< stack_alignment
);
8489 bool recompute_frame_layout_p
= false;
8491 if (crtl
->stack_realign_finalized
)
8493 /* After stack_realign_needed is finalized, we can't no longer
8495 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
8499 /* It is always safe to compute max_used_stack_alignment. We
8500 compute it only if 128-bit aligned load/store may be generated
8501 on misaligned stack slot which will lead to segfault. */
8502 bool check_stack_slot
8503 = (stack_realign
|| crtl
->max_used_stack_slot_alignment
>= 128);
8504 ix86_find_max_used_stack_alignment (stack_alignment
,
8507 /* If the only reason for frame_pointer_needed is that we conservatively
8508 assumed stack realignment might be needed or -fno-omit-frame-pointer
8509 is used, but in the end nothing that needed the stack alignment had
8510 been spilled nor stack access, clear frame_pointer_needed and say we
8511 don't need stack realignment.
8513 When vector register is used for piecewise move and store, we don't
8514 increase stack_alignment_needed as there is no register spill for
8515 piecewise move and store. Since stack_realign_needed is set to true
8516 by checking stack_alignment_estimated which is updated by pseudo
8517 vector register usage, we also need to check stack_realign_needed to
8518 eliminate frame pointer. */
8520 || (!flag_omit_frame_pointer
&& optimize
)
8521 || crtl
->stack_realign_needed
)
8522 && frame_pointer_needed
8524 && crtl
->sp_is_unchanging
8525 && !ix86_current_function_calls_tls_descriptor
8526 && !crtl
->accesses_prior_frames
8527 && !cfun
->calls_alloca
8528 && !crtl
->calls_eh_return
8529 /* See ira_setup_eliminable_regset for the rationale. */
8530 && !(STACK_CHECK_MOVING_SP
8533 && cfun
->can_throw_non_call_exceptions
)
8534 && !ix86_frame_pointer_required ()
8535 && ix86_get_frame_size () == 0
8536 && ix86_nsaved_sseregs () == 0
8537 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
8539 if (cfun
->machine
->stack_frame_required
)
8541 /* Stack frame is required. If stack alignment needed is less
8542 than incoming stack boundary, don't realign stack. */
8543 stack_realign
= incoming_stack_boundary
< stack_alignment
;
8546 crtl
->max_used_stack_slot_alignment
8547 = incoming_stack_boundary
;
8548 crtl
->stack_alignment_needed
8549 = incoming_stack_boundary
;
8550 /* Also update preferred_stack_boundary for leaf
8552 crtl
->preferred_stack_boundary
8553 = incoming_stack_boundary
;
8558 /* If drap has been set, but it actually isn't live at the
8559 start of the function, there is no reason to set it up. */
8562 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
8563 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
8564 REGNO (crtl
->drap_reg
)))
8566 crtl
->drap_reg
= NULL_RTX
;
8567 crtl
->need_drap
= false;
8571 cfun
->machine
->no_drap_save_restore
= true;
8573 frame_pointer_needed
= false;
8574 stack_realign
= false;
8575 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
8576 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
8577 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
8578 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
8579 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
8580 df_finish_pass (true);
8581 df_scan_alloc (NULL
);
8583 df_compute_regs_ever_live (true);
8586 if (flag_var_tracking
)
8588 /* Since frame pointer is no longer available, replace it with
8589 stack pointer - UNITS_PER_WORD in debug insns. */
8591 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
8594 next
= DF_REF_NEXT_REG (ref
);
8595 if (!DF_REF_INSN_INFO (ref
))
8598 /* Make sure the next ref is for a different instruction,
8599 so that we're not affected by the rescan. */
8600 rtx_insn
*insn
= DF_REF_INSN (ref
);
8601 while (next
&& DF_REF_INSN (next
) == insn
)
8602 next
= DF_REF_NEXT_REG (next
);
8604 if (DEBUG_INSN_P (insn
))
8606 bool changed
= false;
8607 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
8609 rtx
*loc
= DF_REF_LOC (ref
);
8610 if (*loc
== hard_frame_pointer_rtx
)
8612 *loc
= plus_constant (Pmode
,
8619 df_insn_rescan (insn
);
8624 recompute_frame_layout_p
= true;
8627 else if (crtl
->max_used_stack_slot_alignment
>= 128
8628 && cfun
->machine
->stack_frame_required
)
8630 /* We don't need to realign stack. max_used_stack_alignment is
8631 used to decide how stack frame should be aligned. This is
8632 independent of any psABIs nor 32-bit vs 64-bit. */
8633 cfun
->machine
->max_used_stack_alignment
8634 = stack_alignment
/ BITS_PER_UNIT
;
8637 if (crtl
->stack_realign_needed
!= stack_realign
)
8638 recompute_frame_layout_p
= true;
8639 crtl
->stack_realign_needed
= stack_realign
;
8640 crtl
->stack_realign_finalized
= true;
8641 if (recompute_frame_layout_p
)
8642 ix86_compute_frame_layout ();
8645 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8648 ix86_elim_entry_set_got (rtx reg
)
8650 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
8651 rtx_insn
*c_insn
= BB_HEAD (bb
);
8652 if (!NONDEBUG_INSN_P (c_insn
))
8653 c_insn
= next_nonnote_nondebug_insn (c_insn
);
8654 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
8656 rtx pat
= PATTERN (c_insn
);
8657 if (GET_CODE (pat
) == PARALLEL
)
8659 rtx set
= XVECEXP (pat
, 0, 0);
8660 if (GET_CODE (set
) == SET
8661 && GET_CODE (SET_SRC (set
)) == UNSPEC
8662 && XINT (SET_SRC (set
), 1) == UNSPEC_SET_GOT
8663 && REGNO (SET_DEST (set
)) == REGNO (reg
))
8664 delete_insn (c_insn
);
8670 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
8675 addr
= plus_constant (Pmode
, frame_reg
, offset
);
8676 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
8677 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
8681 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
8683 return gen_frame_set (reg
, frame_reg
, offset
, false);
8687 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
8689 return gen_frame_set (reg
, frame_reg
, offset
, true);
8693 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
8695 struct machine_function
*m
= cfun
->machine
;
8696 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8697 + m
->call_ms2sysv_extra_regs
;
8698 rtvec v
= rtvec_alloc (ncregs
+ 1);
8699 unsigned int align
, i
, vi
= 0;
8702 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
8703 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8705 /* AL should only be live with sysv_abi. */
8706 gcc_assert (!ix86_eax_live_at_start_p ());
8707 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
8709 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8710 we've actually realigned the stack or not. */
8711 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8712 addr
= choose_baseaddr (frame
.stack_realign_offset
8713 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
8714 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8716 emit_insn (gen_rtx_SET (rax
, addr
));
8718 /* Get the stub symbol. */
8719 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
8720 : XLOGUE_STUB_SAVE
);
8721 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8723 for (i
= 0; i
< ncregs
; ++i
)
8725 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8726 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
8728 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
8731 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
8733 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
8734 RTX_FRAME_RELATED_P (insn
) = true;
8737 /* Generate and return an insn body to AND X with Y. */
8740 gen_and2_insn (rtx x
, rtx y
)
8742 enum insn_code icode
= optab_handler (and_optab
, GET_MODE (x
));
8744 gcc_assert (insn_operand_matches (icode
, 0, x
));
8745 gcc_assert (insn_operand_matches (icode
, 1, x
));
8746 gcc_assert (insn_operand_matches (icode
, 2, y
));
8748 return GEN_FCN (icode
) (x
, x
, y
);
8751 /* Expand the prologue into a bunch of separate insns. */
8754 ix86_expand_prologue (void)
8756 struct machine_function
*m
= cfun
->machine
;
8758 HOST_WIDE_INT allocate
;
8759 bool int_registers_saved
;
8760 bool sse_registers_saved
;
8761 bool save_stub_call_needed
;
8762 rtx static_chain
= NULL_RTX
;
8764 ix86_last_zero_store_uid
= 0;
8765 if (ix86_function_naked (current_function_decl
))
8767 if (flag_stack_usage_info
)
8768 current_function_static_stack_size
= 0;
8772 ix86_finalize_stack_frame_flags ();
8774 /* DRAP should not coexist with stack_realign_fp */
8775 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
8777 memset (&m
->fs
, 0, sizeof (m
->fs
));
8779 /* Initialize CFA state for before the prologue. */
8780 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8781 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
8783 /* Track SP offset to the CFA. We continue tracking this after we've
8784 swapped the CFA register away from SP. In the case of re-alignment
8785 this is fudged; we're interested to offsets within the local frame. */
8786 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8787 m
->fs
.sp_valid
= true;
8788 m
->fs
.sp_realigned
= false;
8790 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
8792 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
8794 /* We should have already generated an error for any use of
8795 ms_hook on a nested function. */
8796 gcc_checking_assert (!ix86_static_chain_on_stack
);
8798 /* Check if profiling is active and we shall use profiling before
8799 prologue variant. If so sorry. */
8800 if (crtl
->profile
&& flag_fentry
!= 0)
8801 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8802 "with %<-mfentry%> for 32-bit");
8804 /* In ix86_asm_output_function_label we emitted:
8805 8b ff movl.s %edi,%edi
8807 8b ec movl.s %esp,%ebp
8809 This matches the hookable function prologue in Win32 API
8810 functions in Microsoft Windows XP Service Pack 2 and newer.
8811 Wine uses this to enable Windows apps to hook the Win32 API
8812 functions provided by Wine.
8814 What that means is that we've already set up the frame pointer. */
8816 if (frame_pointer_needed
8817 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
8821 /* We've decided to use the frame pointer already set up.
8822 Describe this to the unwinder by pretending that both
8823 push and mov insns happen right here.
8825 Putting the unwind info here at the end of the ms_hook
8826 is done so that we can make absolutely certain we get
8827 the required byte sequence at the start of the function,
8828 rather than relying on an assembler that can produce
8829 the exact encoding required.
8831 However it does mean (in the unpatched case) that we have
8832 a 1 insn window where the asynchronous unwind info is
8833 incorrect. However, if we placed the unwind info at
8834 its correct location we would have incorrect unwind info
8835 in the patched case. Which is probably all moot since
8836 I don't expect Wine generates dwarf2 unwind info for the
8837 system libraries that use this feature. */
8839 insn
= emit_insn (gen_blockage ());
8841 push
= gen_push (hard_frame_pointer_rtx
);
8842 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
8844 RTX_FRAME_RELATED_P (push
) = 1;
8845 RTX_FRAME_RELATED_P (mov
) = 1;
8847 RTX_FRAME_RELATED_P (insn
) = 1;
8848 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8849 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
8851 /* Note that gen_push incremented m->fs.cfa_offset, even
8852 though we didn't emit the push insn here. */
8853 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8854 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
8855 m
->fs
.fp_valid
= true;
8859 /* The frame pointer is not needed so pop %ebp again.
8860 This leaves us with a pristine state. */
8861 emit_insn (gen_pop (hard_frame_pointer_rtx
));
8865 /* The first insn of a function that accepts its static chain on the
8866 stack is to push the register that would be filled in by a direct
8867 call. This insn will be skipped by the trampoline. */
8868 else if (ix86_static_chain_on_stack
)
8870 static_chain
= ix86_static_chain (cfun
->decl
, false);
8871 insn
= emit_insn (gen_push (static_chain
));
8872 emit_insn (gen_blockage ());
8874 /* We don't want to interpret this push insn as a register save,
8875 only as a stack adjustment. The real copy of the register as
8876 a save will be done later, if needed. */
8877 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
8878 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
8879 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
8880 RTX_FRAME_RELATED_P (insn
) = 1;
8883 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8884 of DRAP is needed and stack realignment is really needed after reload */
8885 if (stack_realign_drap
)
8887 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8889 /* Can't use DRAP in interrupt function. */
8890 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
8891 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8892 "in interrupt service routine. This may be worked "
8893 "around by avoiding functions with aggregate return.");
8895 /* Only need to push parameter pointer reg if it is caller saved. */
8896 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8898 /* Push arg pointer reg */
8899 insn
= emit_insn (gen_push (crtl
->drap_reg
));
8900 RTX_FRAME_RELATED_P (insn
) = 1;
8903 /* Grab the argument pointer. */
8904 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
8905 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8906 RTX_FRAME_RELATED_P (insn
) = 1;
8907 m
->fs
.cfa_reg
= crtl
->drap_reg
;
8908 m
->fs
.cfa_offset
= 0;
8910 /* Align the stack. */
8911 insn
= emit_insn (gen_and2_insn (stack_pointer_rtx
,
8912 GEN_INT (-align_bytes
)));
8913 RTX_FRAME_RELATED_P (insn
) = 1;
8915 /* Replicate the return address on the stack so that return
8916 address can be reached via (argp - 1) slot. This is needed
8917 to implement macro RETURN_ADDR_RTX and intrinsic function
8918 expand_builtin_return_addr etc. */
8919 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
8920 t
= gen_frame_mem (word_mode
, t
);
8921 insn
= emit_insn (gen_push (t
));
8922 RTX_FRAME_RELATED_P (insn
) = 1;
8924 /* For the purposes of frame and register save area addressing,
8925 we've started over with a new frame. */
8926 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8927 m
->fs
.realigned
= true;
8931 /* Replicate static chain on the stack so that static chain
8932 can be reached via (argp - 2) slot. This is needed for
8933 nested function with stack realignment. */
8934 insn
= emit_insn (gen_push (static_chain
));
8935 RTX_FRAME_RELATED_P (insn
) = 1;
8939 int_registers_saved
= (frame
.nregs
== 0);
8940 sse_registers_saved
= (frame
.nsseregs
== 0);
8941 save_stub_call_needed
= (m
->call_ms2sysv
);
8942 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
8944 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8946 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8947 slower on all targets. Also sdb didn't like it. */
8948 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
8949 RTX_FRAME_RELATED_P (insn
) = 1;
8951 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
8953 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
8954 RTX_FRAME_RELATED_P (insn
) = 1;
8956 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8957 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8958 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
8959 m
->fs
.fp_valid
= true;
8963 if (!int_registers_saved
)
8965 /* If saving registers via PUSH, do so now. */
8966 if (!frame
.save_regs_using_mov
)
8968 ix86_emit_save_regs ();
8969 int_registers_saved
= true;
8970 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
8973 /* When using red zone we may start register saving before allocating
8974 the stack frame saving one cycle of the prologue. However, avoid
8975 doing this if we have to probe the stack; at least on x86_64 the
8976 stack probe can turn into a call that clobbers a red zone location. */
8977 else if (ix86_using_red_zone ()
8978 && (! TARGET_STACK_PROBE
8979 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
8981 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8982 cfun
->machine
->red_zone_used
= true;
8983 int_registers_saved
= true;
8987 if (frame
.red_zone_size
!= 0)
8988 cfun
->machine
->red_zone_used
= true;
8990 if (stack_realign_fp
)
8992 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8993 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
8995 /* Record last valid frame pointer offset. */
8996 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
8998 /* The computation of the size of the re-aligned stack frame means
8999 that we must allocate the size of the register save area before
9000 performing the actual alignment. Otherwise we cannot guarantee
9001 that there's enough storage above the realignment point. */
9002 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
9003 + frame
.stack_realign_allocate
;
9005 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9006 GEN_INT (-allocate
), -1, false);
9008 /* Align the stack. */
9009 emit_insn (gen_and2_insn (stack_pointer_rtx
, GEN_INT (-align_bytes
)));
9010 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
9011 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
9012 - frame
.stack_realign_allocate
;
9013 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
9014 Beyond this point, stack access should be done via choose_baseaddr or
9015 by using sp_valid_at and fp_valid_at to determine the correct base
9016 register. Henceforth, any CFA offset should be thought of as logical
9017 and not physical. */
9018 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
9019 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
9020 m
->fs
.sp_realigned
= true;
9022 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
9023 is needed to describe where a register is saved using a realigned
9024 stack pointer, so we need to invalidate the stack pointer for that
9027 m
->fs
.sp_valid
= false;
9029 /* If SP offset is non-immediate after allocation of the stack frame,
9030 then emit SSE saves or stub call prior to allocating the rest of the
9031 stack frame. This is less efficient for the out-of-line stub because
9032 we can't combine allocations across the call barrier, but it's better
9033 than using a scratch register. */
9034 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
9035 - m
->fs
.sp_realigned_offset
),
9038 if (!sse_registers_saved
)
9040 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
9041 sse_registers_saved
= true;
9043 else if (save_stub_call_needed
)
9045 ix86_emit_outlined_ms2sysv_save (frame
);
9046 save_stub_call_needed
= false;
9051 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
9053 if (flag_stack_usage_info
)
9055 /* We start to count from ARG_POINTER. */
9056 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
9058 /* If it was realigned, take into account the fake frame. */
9059 if (stack_realign_drap
)
9061 if (ix86_static_chain_on_stack
)
9062 stack_size
+= UNITS_PER_WORD
;
9064 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9065 stack_size
+= UNITS_PER_WORD
;
9067 /* This over-estimates by 1 minimal-stack-alignment-unit but
9068 mitigates that by counting in the new return address slot. */
9069 current_function_dynamic_stack_size
9070 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9073 current_function_static_stack_size
= stack_size
;
9076 /* On SEH target with very large frame size, allocate an area to save
9077 SSE registers (as the very large allocation won't be described). */
9079 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
9080 && !sse_registers_saved
)
9082 HOST_WIDE_INT sse_size
9083 = frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
9085 gcc_assert (int_registers_saved
);
9087 /* No need to do stack checking as the area will be immediately
9089 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9090 GEN_INT (-sse_size
), -1,
9091 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9092 allocate
-= sse_size
;
9093 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
9094 sse_registers_saved
= true;
9097 /* If stack clash protection is requested, then probe the stack, unless it
9098 is already probed on the target. */
9100 && flag_stack_clash_protection
9101 && !ix86_target_stack_probe ())
9103 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, false);
9107 /* The stack has already been decremented by the instruction calling us
9108 so probe if the size is non-negative to preserve the protection area. */
9109 else if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
9111 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
9113 if (STACK_CHECK_MOVING_SP
)
9116 && !cfun
->calls_alloca
9117 && allocate
<= probe_interval
)
9122 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, true);
9129 HOST_WIDE_INT size
= allocate
;
9131 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
9132 size
= 0x80000000 - get_stack_check_protect () - 1;
9134 if (TARGET_STACK_PROBE
)
9136 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
9138 if (size
> probe_interval
)
9139 ix86_emit_probe_stack_range (0, size
, int_registers_saved
);
9142 ix86_emit_probe_stack_range (0,
9143 size
+ get_stack_check_protect (),
9144 int_registers_saved
);
9148 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
9150 if (size
> probe_interval
9151 && size
> get_stack_check_protect ())
9152 ix86_emit_probe_stack_range (get_stack_check_protect (),
9154 - get_stack_check_protect ()),
9155 int_registers_saved
);
9158 ix86_emit_probe_stack_range (get_stack_check_protect (), size
,
9159 int_registers_saved
);
9166 else if (!ix86_target_stack_probe ()
9167 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
9169 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9170 GEN_INT (-allocate
), -1,
9171 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9175 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
9177 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9178 bool eax_live
= ix86_eax_live_at_start_p ();
9179 bool r10_live
= false;
9182 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
9186 insn
= emit_insn (gen_push (eax
));
9187 allocate
-= UNITS_PER_WORD
;
9188 /* Note that SEH directives need to continue tracking the stack
9189 pointer even after the frame pointer has been set up. */
9190 if (sp_is_cfa_reg
|| TARGET_SEH
)
9193 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9194 RTX_FRAME_RELATED_P (insn
) = 1;
9195 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
9196 gen_rtx_SET (stack_pointer_rtx
,
9197 plus_constant (Pmode
,
9205 r10
= gen_rtx_REG (Pmode
, R10_REG
);
9206 insn
= emit_insn (gen_push (r10
));
9207 allocate
-= UNITS_PER_WORD
;
9208 if (sp_is_cfa_reg
|| TARGET_SEH
)
9211 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9212 RTX_FRAME_RELATED_P (insn
) = 1;
9213 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
9214 gen_rtx_SET (stack_pointer_rtx
,
9215 plus_constant (Pmode
,
9221 emit_move_insn (eax
, GEN_INT (allocate
));
9222 emit_insn (gen_allocate_stack_worker_probe (Pmode
, eax
, eax
));
9224 /* Use the fact that AX still contains ALLOCATE. */
9225 insn
= emit_insn (gen_pro_epilogue_adjust_stack_sub
9226 (Pmode
, stack_pointer_rtx
, stack_pointer_rtx
, eax
));
9228 if (sp_is_cfa_reg
|| TARGET_SEH
)
9231 m
->fs
.cfa_offset
+= allocate
;
9232 RTX_FRAME_RELATED_P (insn
) = 1;
9233 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
9234 gen_rtx_SET (stack_pointer_rtx
,
9235 plus_constant (Pmode
, stack_pointer_rtx
,
9238 m
->fs
.sp_offset
+= allocate
;
9240 /* Use stack_pointer_rtx for relative addressing so that code works for
9241 realigned stack. But this means that we need a blockage to prevent
9242 stores based on the frame pointer from being scheduled before. */
9243 if (r10_live
&& eax_live
)
9245 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
9246 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
9247 gen_frame_mem (word_mode
, t
));
9248 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
9249 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
9250 gen_frame_mem (word_mode
, t
));
9251 emit_insn (gen_memory_blockage ());
9253 else if (eax_live
|| r10_live
)
9255 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
9256 emit_move_insn (gen_rtx_REG (word_mode
,
9257 (eax_live
? AX_REG
: R10_REG
)),
9258 gen_frame_mem (word_mode
, t
));
9259 emit_insn (gen_memory_blockage ());
9262 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
9264 /* If we havn't already set up the frame pointer, do so now. */
9265 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
9267 insn
= gen_add3_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
,
9268 GEN_INT (frame
.stack_pointer_offset
9269 - frame
.hard_frame_pointer_offset
));
9270 insn
= emit_insn (insn
);
9271 RTX_FRAME_RELATED_P (insn
) = 1;
9272 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
9274 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9275 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
9276 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
9277 m
->fs
.fp_valid
= true;
9280 if (!int_registers_saved
)
9281 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
9282 if (!sse_registers_saved
)
9283 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
9284 else if (save_stub_call_needed
)
9285 ix86_emit_outlined_ms2sysv_save (frame
);
9287 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
9289 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
9291 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9292 insn
= emit_insn (gen_set_got (pic
));
9293 RTX_FRAME_RELATED_P (insn
) = 1;
9294 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
9295 emit_insn (gen_prologue_use (pic
));
9296 /* Deleting already emmitted SET_GOT if exist and allocated to
9297 REAL_PIC_OFFSET_TABLE_REGNUM. */
9298 ix86_elim_entry_set_got (pic
);
9301 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
9303 /* vDRAP is setup but after reload it turns out stack realign
9304 isn't necessary, here we will emit prologue to setup DRAP
9305 without stack realign adjustment */
9306 t
= choose_baseaddr (0, NULL
);
9307 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
9310 /* Prevent instructions from being scheduled into register save push
9311 sequence when access to the redzone area is done through frame pointer.
9312 The offset between the frame pointer and the stack pointer is calculated
9313 relative to the value of the stack pointer at the end of the function
9314 prologue, and moving instructions that access redzone area via frame
9315 pointer inside push sequence violates this assumption. */
9316 if (frame_pointer_needed
&& frame
.red_zone_size
)
9317 emit_insn (gen_memory_blockage ());
9319 /* SEH requires that the prologue end within 256 bytes of the start of
9320 the function. Prevent instruction schedules that would extend that.
9321 Further, prevent alloca modifications to the stack pointer from being
9322 combined with prologue modifications. */
9324 emit_insn (gen_prologue_use (stack_pointer_rtx
));
9327 /* Emit code to restore REG using a POP or POPP insn. */
9330 ix86_emit_restore_reg_using_pop (rtx reg
, bool ppx_p
)
9332 struct machine_function
*m
= cfun
->machine
;
9333 rtx_insn
*insn
= emit_insn (gen_pop (reg
, ppx_p
));
9335 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
9336 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9338 if (m
->fs
.cfa_reg
== crtl
->drap_reg
9339 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
9341 /* Previously we'd represented the CFA as an expression
9342 like *(%ebp - 8). We've just popped that value from
9343 the stack, which means we need to reset the CFA to
9344 the drap register. This will remain until we restore
9345 the stack pointer. */
9346 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
9347 RTX_FRAME_RELATED_P (insn
) = 1;
9349 /* This means that the DRAP register is valid for addressing too. */
9350 m
->fs
.drap_valid
= true;
9354 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9356 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9357 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9358 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9359 RTX_FRAME_RELATED_P (insn
) = 1;
9361 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9364 /* When the frame pointer is the CFA, and we pop it, we are
9365 swapping back to the stack pointer as the CFA. This happens
9366 for stack frames that don't allocate other data, so we assume
9367 the stack pointer is now pointing at the return address, i.e.
9368 the function entry state, which makes the offset be 1 word. */
9369 if (reg
== hard_frame_pointer_rtx
)
9371 m
->fs
.fp_valid
= false;
9372 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
9374 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9375 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9377 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9378 plus_constant (Pmode
, stack_pointer_rtx
,
9380 RTX_FRAME_RELATED_P (insn
) = 1;
9385 /* Emit code to restore REG using a POP2 insn. */
9387 ix86_emit_restore_reg_using_pop2 (rtx reg1
, rtx reg2
, bool ppx_p
= false)
9389 struct machine_function
*m
= cfun
->machine
;
9390 const int offset
= UNITS_PER_WORD
* 2;
9393 rtx mem
= gen_rtx_MEM (TImode
, gen_rtx_POST_INC (Pmode
,
9394 stack_pointer_rtx
));
9397 insn
= emit_insn (gen_pop2p_di (reg1
, mem
, reg2
));
9399 insn
= emit_insn (gen_pop2_di (reg1
, mem
, reg2
));
9401 RTX_FRAME_RELATED_P (insn
) = 1;
9403 rtx dwarf
= NULL_RTX
;
9404 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg1
, dwarf
);
9405 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, dwarf
);
9406 REG_NOTES (insn
) = dwarf
;
9407 m
->fs
.sp_offset
-= offset
;
9409 if (m
->fs
.cfa_reg
== crtl
->drap_reg
9410 && (REGNO (reg1
) == REGNO (crtl
->drap_reg
)
9411 || REGNO (reg2
) == REGNO (crtl
->drap_reg
)))
9413 /* Previously we'd represented the CFA as an expression
9414 like *(%ebp - 8). We've just popped that value from
9415 the stack, which means we need to reset the CFA to
9416 the drap register. This will remain until we restore
9417 the stack pointer. */
9418 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9419 REGNO (reg1
) == REGNO (crtl
->drap_reg
) ? reg1
: reg2
);
9420 RTX_FRAME_RELATED_P (insn
) = 1;
9422 /* This means that the DRAP register is valid for addressing too. */
9423 m
->fs
.drap_valid
= true;
9427 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9429 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
9430 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9431 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9432 RTX_FRAME_RELATED_P (insn
) = 1;
9434 m
->fs
.cfa_offset
-= offset
;
9437 /* When the frame pointer is the CFA, and we pop it, we are
9438 swapping back to the stack pointer as the CFA. This happens
9439 for stack frames that don't allocate other data, so we assume
9440 the stack pointer is now pointing at the return address, i.e.
9441 the function entry state, which makes the offset be 1 word. */
9442 if (reg1
== hard_frame_pointer_rtx
|| reg2
== hard_frame_pointer_rtx
)
9444 m
->fs
.fp_valid
= false;
9445 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
9447 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9448 m
->fs
.cfa_offset
-= offset
;
9450 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9451 plus_constant (Pmode
, stack_pointer_rtx
,
9453 RTX_FRAME_RELATED_P (insn
) = 1;
9458 /* Emit code to restore saved registers using POP insns. */
9461 ix86_emit_restore_regs_using_pop (bool ppx_p
)
9465 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9466 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
9467 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
), ppx_p
);
9470 /* Emit code to restore saved registers using POP2 insns. */
9473 ix86_emit_restore_regs_using_pop2 (void)
9477 regno_list
[0] = regno_list
[1] = -1;
9478 int loaded_regnum
= 0;
9479 bool aligned
= cfun
->machine
->fs
.sp_offset
% 16 == 0;
9481 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9482 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
9486 regno_list
[loaded_regnum
++] = regno
;
9487 if (loaded_regnum
== 2)
9489 gcc_assert (regno_list
[0] != -1
9490 && regno_list
[1] != -1
9491 && regno_list
[0] != regno_list
[1]);
9493 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode
,
9495 gen_rtx_REG (word_mode
,
9499 regno_list
[0] = regno_list
[1] = -1;
9504 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
),
9510 if (loaded_regnum
== 1)
9511 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno_list
[0]),
9515 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9516 omits the emit and only attaches the notes. */
9519 ix86_emit_leave (rtx_insn
*insn
)
9521 struct machine_function
*m
= cfun
->machine
;
9524 insn
= emit_insn (gen_leave (word_mode
));
9526 ix86_add_queued_cfa_restore_notes (insn
);
9528 gcc_assert (m
->fs
.fp_valid
);
9529 m
->fs
.sp_valid
= true;
9530 m
->fs
.sp_realigned
= false;
9531 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
9532 m
->fs
.fp_valid
= false;
9534 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
9536 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9537 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
9539 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9540 plus_constant (Pmode
, stack_pointer_rtx
,
9542 RTX_FRAME_RELATED_P (insn
) = 1;
9544 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
9548 /* Emit code to restore saved registers using MOV insns.
9549 First register is restored from CFA - CFA_OFFSET. */
9551 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
9552 bool maybe_eh_return
)
9554 struct machine_function
*m
= cfun
->machine
;
9557 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9558 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
9560 rtx reg
= gen_rtx_REG (word_mode
, regno
);
9564 mem
= choose_baseaddr (cfa_offset
, NULL
);
9565 mem
= gen_frame_mem (word_mode
, mem
);
9566 insn
= emit_move_insn (reg
, mem
);
9568 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9570 /* Previously we'd represented the CFA as an expression
9571 like *(%ebp - 8). We've just popped that value from
9572 the stack, which means we need to reset the CFA to
9573 the drap register. This will remain until we restore
9574 the stack pointer. */
9575 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
9576 RTX_FRAME_RELATED_P (insn
) = 1;
9578 /* This means that the DRAP register is valid for addressing. */
9579 m
->fs
.drap_valid
= true;
9582 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
9584 cfa_offset
-= UNITS_PER_WORD
;
9588 /* Emit code to restore saved registers using MOV insns.
9589 First register is restored from CFA - CFA_OFFSET. */
9591 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
9592 bool maybe_eh_return
)
9596 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9597 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
9599 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
9601 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
9603 mem
= choose_baseaddr (cfa_offset
, &align
);
9604 mem
= gen_rtx_MEM (V4SFmode
, mem
);
9606 /* The location aligment depends upon the base register. */
9607 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
9608 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
9609 set_mem_align (mem
, align
);
9610 emit_insn (gen_rtx_SET (reg
, mem
));
9612 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
9614 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
9619 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
9620 bool use_call
, int style
)
9622 struct machine_function
*m
= cfun
->machine
;
9623 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
9624 + m
->call_ms2sysv_extra_regs
;
9626 unsigned int elems_needed
, align
, i
, vi
= 0;
9629 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
9631 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
9632 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
9633 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
9634 rtx rsi_frame_load
= NULL_RTX
;
9635 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
9636 enum xlogue_stub stub
;
9638 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
9640 /* If using a realigned stack, we should never start with padding. */
9641 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
9643 /* Setup RSI as the stub's base pointer. */
9644 align
= GET_MODE_ALIGNMENT (V4SFmode
);
9645 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
9646 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
9648 emit_insn (gen_rtx_SET (rsi
, tmp
));
9650 /* Get a symbol for the stub. */
9651 if (frame_pointer_needed
)
9652 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
9653 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
9655 stub
= use_call
? XLOGUE_STUB_RESTORE
9656 : XLOGUE_STUB_RESTORE_TAIL
;
9657 sym
= xlogue
.get_stub_rtx (stub
);
9659 elems_needed
= ncregs
;
9663 elems_needed
+= frame_pointer_needed
? 5 : 3;
9664 v
= rtvec_alloc (elems_needed
);
9666 /* We call the epilogue stub when we need to pop incoming args or we are
9667 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9668 epilogue stub and it is the tail-call. */
9670 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
9673 RTVEC_ELT (v
, vi
++) = ret_rtx
;
9674 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
9675 if (frame_pointer_needed
)
9677 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
9678 gcc_assert (m
->fs
.fp_valid
);
9679 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
9681 tmp
= plus_constant (DImode
, rbp
, 8);
9682 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
9683 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
9684 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
9685 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
9689 /* If no hard frame pointer, we set R10 to the SP restore value. */
9690 gcc_assert (!m
->fs
.fp_valid
);
9691 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9692 gcc_assert (m
->fs
.sp_valid
);
9694 r10
= gen_rtx_REG (DImode
, R10_REG
);
9695 tmp
= plus_constant (Pmode
, rsi
, stub_ptr_offset
);
9696 emit_insn (gen_rtx_SET (r10
, tmp
));
9698 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
9702 /* Generate frame load insns and restore notes. */
9703 for (i
= 0; i
< ncregs
; ++i
)
9705 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
9706 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
9707 rtx reg
, frame_load
;
9709 reg
= gen_rtx_REG (mode
, r
.regno
);
9710 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
9712 /* Save RSI frame load insn & note to add last. */
9713 if (r
.regno
== SI_REG
)
9715 gcc_assert (!rsi_frame_load
);
9716 rsi_frame_load
= frame_load
;
9717 rsi_restore_offset
= r
.offset
;
9721 RTVEC_ELT (v
, vi
++) = frame_load
;
9722 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
9726 /* Add RSI frame load & restore note at the end. */
9727 gcc_assert (rsi_frame_load
);
9728 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
9729 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
9730 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
9731 rsi_restore_offset
);
9733 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9734 if (!use_call
&& !frame_pointer_needed
)
9736 gcc_assert (m
->fs
.sp_valid
);
9737 gcc_assert (!m
->fs
.sp_realigned
);
9739 /* At this point, R10 should point to frame.stack_realign_offset. */
9740 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9741 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9742 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
9745 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
9746 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
9748 insn
= emit_insn (tmp
);
9751 insn
= emit_jump_insn (tmp
);
9752 JUMP_LABEL (insn
) = ret_rtx
;
9754 if (frame_pointer_needed
)
9755 ix86_emit_leave (insn
);
9758 /* Need CFA adjust note. */
9759 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
9760 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
9764 RTX_FRAME_RELATED_P (insn
) = true;
9765 ix86_add_queued_cfa_restore_notes (insn
);
9767 /* If we're not doing a tail-call, we need to adjust the stack. */
9768 if (use_call
&& m
->fs
.sp_valid
)
9770 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9771 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9772 GEN_INT (dealloc
), style
,
9773 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9777 /* Restore function stack, frame, and registers. */
9780 ix86_expand_epilogue (int style
)
9782 struct machine_function
*m
= cfun
->machine
;
9783 struct machine_frame_state frame_state_save
= m
->fs
;
9784 bool restore_regs_via_mov
;
9786 bool restore_stub_is_tail
= false;
9788 if (ix86_function_naked (current_function_decl
))
9790 /* The program should not reach this point. */
9791 emit_insn (gen_ud2 ());
9795 ix86_finalize_stack_frame_flags ();
9796 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
9798 m
->fs
.sp_realigned
= stack_realign_fp
;
9799 m
->fs
.sp_valid
= stack_realign_fp
9800 || !frame_pointer_needed
9801 || crtl
->sp_is_unchanging
;
9802 gcc_assert (!m
->fs
.sp_valid
9803 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
9805 /* The FP must be valid if the frame pointer is present. */
9806 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
9807 gcc_assert (!m
->fs
.fp_valid
9808 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
9810 /* We must have *some* valid pointer to the stack frame. */
9811 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
9813 /* The DRAP is never valid at this point. */
9814 gcc_assert (!m
->fs
.drap_valid
);
9816 /* See the comment about red zone and frame
9817 pointer usage in ix86_expand_prologue. */
9818 if (frame_pointer_needed
&& frame
.red_zone_size
)
9819 emit_insn (gen_memory_blockage ());
9821 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
9822 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
9824 /* Determine the CFA offset of the end of the red-zone. */
9825 m
->fs
.red_zone_offset
= 0;
9826 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
9828 /* The red-zone begins below return address and error code in
9829 exception handler. */
9830 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
9832 /* When the register save area is in the aligned portion of
9833 the stack, determine the maximum runtime displacement that
9834 matches up with the aligned frame. */
9835 if (stack_realign_drap
)
9836 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
9840 HOST_WIDE_INT reg_save_offset
= frame
.reg_save_offset
;
9842 /* Special care must be taken for the normal return case of a function
9843 using eh_return: the eax and edx registers are marked as saved, but
9844 not restored along this path. Adjust the save location to match. */
9845 if (crtl
->calls_eh_return
&& style
!= 2)
9846 reg_save_offset
-= 2 * UNITS_PER_WORD
;
9848 /* EH_RETURN requires the use of moves to function properly. */
9849 if (crtl
->calls_eh_return
)
9850 restore_regs_via_mov
= true;
9851 /* SEH requires the use of pops to identify the epilogue. */
9852 else if (TARGET_SEH
)
9853 restore_regs_via_mov
= false;
9854 /* If we're only restoring one register and sp cannot be used then
9855 using a move instruction to restore the register since it's
9856 less work than reloading sp and popping the register. */
9857 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
9858 restore_regs_via_mov
= true;
9859 else if (TARGET_EPILOGUE_USING_MOVE
9860 && cfun
->machine
->use_fast_prologue_epilogue
9862 || m
->fs
.sp_offset
!= reg_save_offset
))
9863 restore_regs_via_mov
= true;
9864 else if (frame_pointer_needed
9866 && m
->fs
.sp_offset
!= reg_save_offset
)
9867 restore_regs_via_mov
= true;
9868 else if (frame_pointer_needed
9870 && cfun
->machine
->use_fast_prologue_epilogue
9871 && frame
.nregs
== 1)
9872 restore_regs_via_mov
= true;
9874 restore_regs_via_mov
= false;
9876 if (restore_regs_via_mov
|| frame
.nsseregs
)
9878 /* Ensure that the entire register save area is addressable via
9879 the stack pointer, if we will restore SSE regs via sp. */
9881 && m
->fs
.sp_offset
> 0x7fffffff
9882 && sp_valid_at (frame
.stack_realign_offset
+ 1)
9883 && (frame
.nsseregs
+ frame
.nregs
) != 0)
9885 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9886 GEN_INT (m
->fs
.sp_offset
9887 - frame
.sse_reg_save_offset
),
9889 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9893 /* If there are any SSE registers to restore, then we have to do it
9894 via moves, since there's obviously no pop for SSE regs. */
9896 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
9899 if (m
->call_ms2sysv
)
9901 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
9903 /* We cannot use a tail-call for the stub if:
9904 1. We have to pop incoming args,
9905 2. We have additional int regs to restore, or
9906 3. A sibling call will be the tail-call, or
9907 4. We are emitting an eh_return_internal epilogue.
9909 TODO: Item 4 has not yet tested!
9911 If any of the above are true, we will call the stub rather than
9913 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
9914 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
9917 /* If using out-of-line stub that is a tail-call, then...*/
9918 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
9920 /* TODO: parinoid tests. (remove eventually) */
9921 gcc_assert (m
->fs
.sp_valid
);
9922 gcc_assert (!m
->fs
.sp_realigned
);
9923 gcc_assert (!m
->fs
.fp_valid
);
9924 gcc_assert (!m
->fs
.realigned
);
9925 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
9926 gcc_assert (!crtl
->drap_reg
);
9927 gcc_assert (!frame
.nregs
);
9929 else if (restore_regs_via_mov
)
9934 ix86_emit_restore_regs_using_mov (reg_save_offset
, style
== 2);
9936 /* eh_return epilogues need %ecx added to the stack pointer. */
9939 rtx sa
= EH_RETURN_STACKADJ_RTX
;
9942 /* Stack realignment doesn't work with eh_return. */
9943 if (crtl
->stack_realign_needed
)
9944 sorry ("Stack realignment not supported with "
9945 "%<__builtin_eh_return%>");
9947 /* regparm nested functions don't work with eh_return. */
9948 if (ix86_static_chain_on_stack
)
9949 sorry ("regparm nested function not supported with "
9950 "%<__builtin_eh_return%>");
9952 if (frame_pointer_needed
)
9954 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
9955 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
9956 emit_insn (gen_rtx_SET (sa
, t
));
9958 /* NB: eh_return epilogues must restore the frame pointer
9959 in word_mode since the upper 32 bits of RBP register
9960 can have any values. */
9961 t
= gen_frame_mem (word_mode
, hard_frame_pointer_rtx
);
9962 rtx frame_reg
= gen_rtx_REG (word_mode
,
9963 HARD_FRAME_POINTER_REGNUM
);
9964 insn
= emit_move_insn (frame_reg
, t
);
9966 /* Note that we use SA as a temporary CFA, as the return
9967 address is at the proper place relative to it. We
9968 pretend this happens at the FP restore insn because
9969 prior to this insn the FP would be stored at the wrong
9970 offset relative to SA, and after this insn we have no
9971 other reasonable register to use for the CFA. We don't
9972 bother resetting the CFA to the SP for the duration of
9973 the return insn, unless the control flow instrumentation
9974 is done. In this case the SP is used later and we have
9975 to reset CFA to SP. */
9976 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9977 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
9978 ix86_add_queued_cfa_restore_notes (insn
);
9979 add_reg_note (insn
, REG_CFA_RESTORE
, frame_reg
);
9980 RTX_FRAME_RELATED_P (insn
) = 1;
9983 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9984 m
->fs
.fp_valid
= false;
9986 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
9988 flag_cf_protection
);
9992 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
9993 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
9994 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
9995 ix86_add_queued_cfa_restore_notes (insn
);
9997 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9998 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10000 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10001 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10002 plus_constant (Pmode
, stack_pointer_rtx
,
10004 RTX_FRAME_RELATED_P (insn
) = 1;
10007 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10008 m
->fs
.sp_valid
= true;
10009 m
->fs
.sp_realigned
= false;
10014 /* SEH requires that the function end with (1) a stack adjustment
10015 if necessary, (2) a sequence of pops, and (3) a return or
10016 jump instruction. Prevent insns from the function body from
10017 being scheduled into this sequence. */
10020 /* Prevent a catch region from being adjacent to the standard
10021 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
10022 nor several other flags that would be interesting to test are
10024 if (flag_non_call_exceptions
)
10025 emit_insn (gen_nops (const1_rtx
));
10027 emit_insn (gen_blockage ());
10030 /* First step is to deallocate the stack frame so that we can
10031 pop the registers. If the stack pointer was realigned, it needs
10032 to be restored now. Also do it on SEH target for very large
10033 frame as the emitted instructions aren't allowed by the ABI
10035 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
10037 && (m
->fs
.sp_offset
- reg_save_offset
10038 >= SEH_MAX_FRAME_SIZE
)))
10040 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10041 GEN_INT (m
->fs
.fp_offset
10042 - reg_save_offset
),
10045 else if (m
->fs
.sp_offset
!= reg_save_offset
)
10047 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10048 GEN_INT (m
->fs
.sp_offset
10049 - reg_save_offset
),
10051 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10054 if (TARGET_APX_PUSH2POP2
10055 && ix86_can_use_push2pop2 ()
10056 && m
->func_type
== TYPE_NORMAL
)
10057 ix86_emit_restore_regs_using_pop2 ();
10059 ix86_emit_restore_regs_using_pop (TARGET_APX_PPX
);
10062 /* If we used a stack pointer and haven't already got rid of it,
10064 if (m
->fs
.fp_valid
)
10066 /* If the stack pointer is valid and pointing at the frame
10067 pointer store address, then we only need a pop. */
10068 if (sp_valid_at (frame
.hfp_save_offset
)
10069 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10070 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10071 /* Leave results in shorter dependency chains on CPUs that are
10072 able to grok it fast. */
10073 else if (TARGET_USE_LEAVE
10074 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
10075 || !cfun
->machine
->use_fast_prologue_epilogue
)
10076 ix86_emit_leave (NULL
);
10079 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10080 hard_frame_pointer_rtx
,
10081 const0_rtx
, style
, !using_drap
);
10082 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10088 int param_ptr_offset
= UNITS_PER_WORD
;
10091 gcc_assert (stack_realign_drap
);
10093 if (ix86_static_chain_on_stack
)
10094 param_ptr_offset
+= UNITS_PER_WORD
;
10095 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
10096 param_ptr_offset
+= UNITS_PER_WORD
;
10098 insn
= emit_insn (gen_rtx_SET
10099 (stack_pointer_rtx
,
10100 plus_constant (Pmode
, crtl
->drap_reg
,
10101 -param_ptr_offset
)));
10102 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10103 m
->fs
.cfa_offset
= param_ptr_offset
;
10104 m
->fs
.sp_offset
= param_ptr_offset
;
10105 m
->fs
.realigned
= false;
10107 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10108 plus_constant (Pmode
, stack_pointer_rtx
,
10109 param_ptr_offset
));
10110 RTX_FRAME_RELATED_P (insn
) = 1;
10112 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
10113 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10116 /* At this point the stack pointer must be valid, and we must have
10117 restored all of the registers. We may not have deallocated the
10118 entire stack frame. We've delayed this until now because it may
10119 be possible to merge the local stack deallocation with the
10120 deallocation forced by ix86_static_chain_on_stack. */
10121 gcc_assert (m
->fs
.sp_valid
);
10122 gcc_assert (!m
->fs
.sp_realigned
);
10123 gcc_assert (!m
->fs
.fp_valid
);
10124 gcc_assert (!m
->fs
.realigned
);
10125 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10127 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10128 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10132 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10134 /* Sibcall epilogues don't want a return instruction. */
10137 m
->fs
= frame_state_save
;
10141 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
10142 emit_jump_insn (gen_interrupt_return ());
10143 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10145 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10147 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10148 address, do explicit add, and jump indirectly to the caller. */
10150 if (crtl
->args
.pops_args
>= 65536)
10152 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
10155 /* There is no "pascal" calling convention in any 64bit ABI. */
10156 gcc_assert (!TARGET_64BIT
);
10158 insn
= emit_insn (gen_pop (ecx
));
10159 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10160 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10162 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10163 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
10164 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10165 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
10166 RTX_FRAME_RELATED_P (insn
) = 1;
10168 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10170 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
10173 emit_jump_insn (gen_simple_return_pop_internal (popc
));
10175 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
10177 /* In case of return from EH a simple return cannot be used
10178 as a return address will be compared with a shadow stack
10179 return address. Use indirect jump instead. */
10180 if (style
== 2 && flag_cf_protection
)
10182 /* Register used in indirect jump must be in word_mode. But
10183 Pmode may not be the same as word_mode for x32. */
10184 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
10187 insn
= emit_insn (gen_pop (ecx
));
10188 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10189 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10191 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10192 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
10193 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10194 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
10195 RTX_FRAME_RELATED_P (insn
) = 1;
10197 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
10200 emit_jump_insn (gen_simple_return_internal ());
10203 /* Restore the state back to the state from the prologue,
10204 so that it's correct for the next epilogue. */
10205 m
->fs
= frame_state_save
;
10208 /* Reset from the function's potential modifications. */
10211 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
10213 if (pic_offset_table_rtx
10214 && !ix86_use_pseudo_pic_reg ())
10215 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
10219 rtx_insn
*insn
= get_last_insn ();
10220 rtx_insn
*deleted_debug_label
= NULL
;
10222 /* Mach-O doesn't support labels at the end of objects, so if
10223 it looks like we might want one, take special action.
10224 First, collect any sequence of deleted debug labels. */
10227 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
10229 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10230 notes only, instead set their CODE_LABEL_NUMBER to -1,
10231 otherwise there would be code generation differences
10232 in between -g and -g0. */
10233 if (NOTE_P (insn
) && NOTE_KIND (insn
)
10234 == NOTE_INSN_DELETED_DEBUG_LABEL
)
10235 deleted_debug_label
= insn
;
10236 insn
= PREV_INSN (insn
);
10242 then this needs to be detected, so skip past the barrier. */
10244 if (insn
&& BARRIER_P (insn
))
10245 insn
= PREV_INSN (insn
);
10247 /* Up to now we've only seen notes or barriers. */
10252 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
10253 /* Trailing label. */
10254 fputs ("\tnop\n", file
);
10255 else if (cfun
&& ! cfun
->is_thunk
)
10257 /* See if we have a completely empty function body, skipping
10258 the special case of the picbase thunk emitted as asm. */
10259 while (insn
&& ! INSN_P (insn
))
10260 insn
= PREV_INSN (insn
);
10261 /* If we don't find any insns, we've got an empty function body;
10262 I.e. completely empty - without a return or branch. This is
10263 taken as the case where a function body has been removed
10264 because it contains an inline __builtin_unreachable(). GCC
10265 declares that reaching __builtin_unreachable() means UB so
10266 we're not obliged to do anything special; however, we want
10267 non-zero-sized function bodies. To meet this, and help the
10268 user out, let's trap the case. */
10270 fputs ("\tud2\n", file
);
10273 else if (deleted_debug_label
)
10274 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
10275 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
10276 CODE_LABEL_NUMBER (insn
) = -1;
10280 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
10283 ix86_print_patchable_function_entry (FILE *file
,
10284 unsigned HOST_WIDE_INT patch_area_size
,
10287 if (cfun
->machine
->function_label_emitted
)
10289 /* NB: When ix86_print_patchable_function_entry is called after
10290 function table has been emitted, we have inserted or queued
10291 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
10292 place. There is nothing to do here. */
10296 default_print_patchable_function_entry (file
, patch_area_size
,
10300 /* Output patchable area. NB: default_print_patchable_function_entry
10301 isn't available in i386.md. */
10304 ix86_output_patchable_area (unsigned int patch_area_size
,
10307 default_print_patchable_function_entry (asm_out_file
,
10312 /* Return a scratch register to use in the split stack prologue. The
10313 split stack prologue is used for -fsplit-stack. It is the first
10314 instructions in the function, even before the regular prologue.
10315 The scratch register can be any caller-saved register which is not
10316 used for parameters or for the static chain. */
10318 static unsigned int
10319 split_stack_prologue_scratch_regno (void)
10325 bool is_fastcall
, is_thiscall
;
10328 is_fastcall
= (lookup_attribute ("fastcall",
10329 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
10331 is_thiscall
= (lookup_attribute ("thiscall",
10332 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
10334 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
10338 if (DECL_STATIC_CHAIN (cfun
->decl
))
10340 sorry ("%<-fsplit-stack%> does not support fastcall with "
10341 "nested function");
10342 return INVALID_REGNUM
;
10346 else if (is_thiscall
)
10348 if (!DECL_STATIC_CHAIN (cfun
->decl
))
10352 else if (regparm
< 3)
10354 if (!DECL_STATIC_CHAIN (cfun
->decl
))
10360 sorry ("%<-fsplit-stack%> does not support 2 register "
10361 "parameters for a nested function");
10362 return INVALID_REGNUM
;
10369 /* FIXME: We could make this work by pushing a register
10370 around the addition and comparison. */
10371 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
10372 return INVALID_REGNUM
;
10377 /* A SYMBOL_REF for the function which allocates new stackspace for
10380 static GTY(()) rtx split_stack_fn
;
10382 /* A SYMBOL_REF for the more stack function when using the large
10385 static GTY(()) rtx split_stack_fn_large
;
10387 /* Return location of the stack guard value in the TLS block. */
10390 ix86_split_stack_guard (void)
10393 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
10396 gcc_assert (flag_split_stack
);
10398 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
10399 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
10401 gcc_unreachable ();
10404 r
= GEN_INT (offset
);
10405 r
= gen_const_mem (Pmode
, r
);
10406 set_mem_addr_space (r
, as
);
10411 /* Handle -fsplit-stack. These are the first instructions in the
10412 function, even before the regular prologue. */
10415 ix86_expand_split_stack_prologue (void)
10417 HOST_WIDE_INT allocate
;
10418 unsigned HOST_WIDE_INT args_size
;
10419 rtx_code_label
*label
;
10420 rtx limit
, current
, allocate_rtx
, call_fusage
;
10421 rtx_insn
*call_insn
;
10422 unsigned int scratch_regno
= INVALID_REGNUM
;
10423 rtx scratch_reg
= NULL_RTX
;
10424 rtx_code_label
*varargs_label
= NULL
;
10427 gcc_assert (flag_split_stack
&& reload_completed
);
10429 ix86_finalize_stack_frame_flags ();
10430 struct ix86_frame
&frame
= cfun
->machine
->frame
;
10431 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
10433 /* This is the label we will branch to if we have enough stack
10434 space. We expect the basic block reordering pass to reverse this
10435 branch if optimizing, so that we branch in the unlikely case. */
10436 label
= gen_label_rtx ();
10438 /* We need to compare the stack pointer minus the frame size with
10439 the stack boundary in the TCB. The stack boundary always gives
10440 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10441 can compare directly. Otherwise we need to do an addition. */
10443 limit
= ix86_split_stack_guard ();
10445 if (allocate
>= SPLIT_STACK_AVAILABLE
10446 || flag_force_indirect_call
)
10448 scratch_regno
= split_stack_prologue_scratch_regno ();
10449 if (scratch_regno
== INVALID_REGNUM
)
10453 if (allocate
>= SPLIT_STACK_AVAILABLE
)
10457 /* We need a scratch register to hold the stack pointer minus
10458 the required frame size. Since this is the very start of the
10459 function, the scratch register can be any caller-saved
10460 register which is not used for parameters. */
10461 offset
= GEN_INT (- allocate
);
10463 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
10464 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
10466 /* We don't use gen_add in this case because it will
10467 want to split to lea, but when not optimizing the insn
10468 will not be split after this point. */
10469 emit_insn (gen_rtx_SET (scratch_reg
,
10470 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10475 emit_move_insn (scratch_reg
, offset
);
10476 emit_insn (gen_add2_insn (scratch_reg
, stack_pointer_rtx
));
10478 current
= scratch_reg
;
10481 current
= stack_pointer_rtx
;
10483 ix86_expand_branch (GEU
, current
, limit
, label
);
10484 rtx_insn
*jump_insn
= get_last_insn ();
10485 JUMP_LABEL (jump_insn
) = label
;
10487 /* Mark the jump as very likely to be taken. */
10488 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
10490 if (split_stack_fn
== NULL_RTX
)
10492 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
10493 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
10495 fn
= split_stack_fn
;
10497 /* Get more stack space. We pass in the desired stack space and the
10498 size of the arguments to copy to the new stack. In 32-bit mode
10499 we push the parameters; __morestack will return on a new stack
10500 anyhow. In 64-bit mode we pass the parameters in r10 and
10502 allocate_rtx
= GEN_INT (allocate
);
10503 args_size
= crtl
->args
.size
>= 0 ? (HOST_WIDE_INT
) crtl
->args
.size
: 0;
10504 call_fusage
= NULL_RTX
;
10505 rtx pop
= NULL_RTX
;
10510 reg10
= gen_rtx_REG (DImode
, R10_REG
);
10511 reg11
= gen_rtx_REG (DImode
, R11_REG
);
10513 /* If this function uses a static chain, it will be in %r10.
10514 Preserve it across the call to __morestack. */
10515 if (DECL_STATIC_CHAIN (cfun
->decl
))
10519 rax
= gen_rtx_REG (word_mode
, AX_REG
);
10520 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
10521 use_reg (&call_fusage
, rax
);
10524 if (flag_force_indirect_call
10525 || ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
10527 HOST_WIDE_INT argval
;
10529 if (split_stack_fn_large
== NULL_RTX
)
10531 split_stack_fn_large
10532 = gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
10533 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
10536 fn
= split_stack_fn_large
;
10538 if (ix86_cmodel
== CM_LARGE_PIC
)
10540 rtx_code_label
*label
;
10543 gcc_assert (Pmode
== DImode
);
10545 label
= gen_label_rtx ();
10546 emit_label (label
);
10547 LABEL_PRESERVE_P (label
) = 1;
10548 emit_insn (gen_set_rip_rex64 (reg10
, label
));
10549 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
10550 emit_insn (gen_add2_insn (reg10
, reg11
));
10551 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fn
), UNSPEC_GOT
);
10552 x
= gen_rtx_CONST (Pmode
, x
);
10553 emit_move_insn (reg11
, x
);
10554 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
10555 x
= gen_const_mem (Pmode
, x
);
10556 fn
= copy_to_suggested_reg (x
, reg11
, Pmode
);
10558 else if (ix86_cmodel
== CM_LARGE
)
10559 fn
= copy_to_suggested_reg (fn
, reg11
, Pmode
);
10561 /* When using the large model we need to load the address
10562 into a register, and we've run out of registers. So we
10563 switch to a different calling convention, and we call a
10564 different function: __morestack_large. We pass the
10565 argument size in the upper 32 bits of r10 and pass the
10566 frame size in the lower 32 bits. */
10567 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
10568 gcc_assert ((args_size
& 0xffffffff) == args_size
);
10570 argval
= ((args_size
<< 16) << 16) + allocate
;
10571 emit_move_insn (reg10
, GEN_INT (argval
));
10575 emit_move_insn (reg10
, allocate_rtx
);
10576 emit_move_insn (reg11
, GEN_INT (args_size
));
10577 use_reg (&call_fusage
, reg11
);
10580 use_reg (&call_fusage
, reg10
);
10584 if (flag_force_indirect_call
&& flag_pic
)
10588 gcc_assert (Pmode
== SImode
);
10590 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
10592 emit_insn (gen_set_got (scratch_reg
));
10593 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn
),
10595 x
= gen_rtx_CONST (Pmode
, x
);
10596 x
= gen_rtx_PLUS (Pmode
, scratch_reg
, x
);
10597 x
= gen_const_mem (Pmode
, x
);
10598 fn
= copy_to_suggested_reg (x
, scratch_reg
, Pmode
);
10601 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
10602 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
10603 insn
= emit_insn (gen_push (allocate_rtx
));
10604 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
10605 pop
= GEN_INT (2 * UNITS_PER_WORD
);
10608 if (flag_force_indirect_call
&& !register_operand (fn
, VOIDmode
))
10610 scratch_reg
= gen_rtx_REG (word_mode
, scratch_regno
);
10612 if (GET_MODE (fn
) != word_mode
)
10613 fn
= gen_rtx_ZERO_EXTEND (word_mode
, fn
);
10615 fn
= copy_to_suggested_reg (fn
, scratch_reg
, word_mode
);
10618 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
10619 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
10621 add_function_usage_to (call_insn
, call_fusage
);
10623 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
10624 /* Indicate that this function can't jump to non-local gotos. */
10625 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
10627 /* In order to make call/return prediction work right, we now need
10628 to execute a return instruction. See
10629 libgcc/config/i386/morestack.S for the details on how this works.
10631 For flow purposes gcc must not see this as a return
10632 instruction--we need control flow to continue at the subsequent
10633 label. Therefore, we use an unspec. */
10634 gcc_assert (crtl
->args
.pops_args
< 65536);
10636 = emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
10638 if ((flag_cf_protection
& CF_BRANCH
))
10640 /* Insert ENDBR since __morestack will jump back here via indirect
10642 rtx cet_eb
= gen_nop_endbr ();
10643 emit_insn_after (cet_eb
, ret_insn
);
10646 /* If we are in 64-bit mode and this function uses a static chain,
10647 we saved %r10 in %rax before calling _morestack. */
10648 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
10649 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10650 gen_rtx_REG (word_mode
, AX_REG
));
10652 /* If this function calls va_start, we need to store a pointer to
10653 the arguments on the old stack, because they may not have been
10654 all copied to the new stack. At this point the old stack can be
10655 found at the frame pointer value used by __morestack, because
10656 __morestack has set that up before calling back to us. Here we
10657 store that pointer in a scratch register, and in
10658 ix86_expand_prologue we store the scratch register in a stack
10660 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
10665 scratch_regno
= split_stack_prologue_scratch_regno ();
10666 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
10667 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
10671 return address within this function
10672 return address of caller of this function
10674 So we add three words to get to the stack arguments.
10678 return address within this function
10679 first argument to __morestack
10680 second argument to __morestack
10681 return address of caller of this function
10683 So we add five words to get to the stack arguments.
10685 words
= TARGET_64BIT
? 3 : 5;
10686 emit_insn (gen_rtx_SET (scratch_reg
,
10687 plus_constant (Pmode
, frame_reg
,
10688 words
* UNITS_PER_WORD
)));
10690 varargs_label
= gen_label_rtx ();
10691 emit_jump_insn (gen_jump (varargs_label
));
10692 JUMP_LABEL (get_last_insn ()) = varargs_label
;
10697 emit_label (label
);
10698 LABEL_NUSES (label
) = 1;
10700 /* If this function calls va_start, we now have to set the scratch
10701 register for the case where we do not call __morestack. In this
10702 case we need to set it based on the stack pointer. */
10703 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
10705 emit_insn (gen_rtx_SET (scratch_reg
,
10706 plus_constant (Pmode
, stack_pointer_rtx
,
10709 emit_label (varargs_label
);
10710 LABEL_NUSES (varargs_label
) = 1;
10714 /* We may have to tell the dataflow pass that the split stack prologue
10715 is initializing a scratch register. */
10718 ix86_live_on_entry (bitmap regs
)
10720 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
10722 gcc_assert (flag_split_stack
);
10723 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
10727 /* Extract the parts of an RTL expression that is a valid memory address
10728 for an instruction. Return false if the structure of the address is
10732 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
10734 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
10735 rtx base_reg
, index_reg
;
10736 HOST_WIDE_INT scale
= 1;
10737 rtx scale_rtx
= NULL_RTX
;
10739 addr_space_t seg
= ADDR_SPACE_GENERIC
;
10741 /* Allow zero-extended SImode addresses,
10742 they will be emitted with addr32 prefix. */
10743 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
10745 if (GET_CODE (addr
) == ZERO_EXTEND
10746 && GET_MODE (XEXP (addr
, 0)) == SImode
)
10748 addr
= XEXP (addr
, 0);
10749 if (CONST_INT_P (addr
))
10752 else if (GET_CODE (addr
) == AND
10753 && const_32bit_mask (XEXP (addr
, 1), DImode
))
10755 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
10756 if (addr
== NULL_RTX
)
10759 if (CONST_INT_P (addr
))
10762 else if (GET_CODE (addr
) == AND
)
10764 /* For ASHIFT inside AND, combine will not generate
10765 canonical zero-extend. Merge mask for AND and shift_count
10766 to check if it is canonical zero-extend. */
10767 tmp
= XEXP (addr
, 0);
10768 rtx mask
= XEXP (addr
, 1);
10769 if (tmp
&& GET_CODE(tmp
) == ASHIFT
)
10771 rtx shift_val
= XEXP (tmp
, 1);
10772 if (CONST_INT_P (mask
) && CONST_INT_P (shift_val
)
10773 && (((unsigned HOST_WIDE_INT
) INTVAL(mask
)
10774 | ((HOST_WIDE_INT_1U
<< INTVAL(shift_val
)) - 1))
10777 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0),
10785 /* Allow SImode subregs of DImode addresses,
10786 they will be emitted with addr32 prefix. */
10787 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
10789 if (SUBREG_P (addr
)
10790 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
10792 addr
= SUBREG_REG (addr
);
10793 if (CONST_INT_P (addr
))
10800 else if (SUBREG_P (addr
))
10802 if (REG_P (SUBREG_REG (addr
)))
10807 else if (GET_CODE (addr
) == PLUS
)
10809 rtx addends
[4], op
;
10817 addends
[n
++] = XEXP (op
, 1);
10820 while (GET_CODE (op
) == PLUS
);
10825 for (i
= n
; i
>= 0; --i
)
10828 switch (GET_CODE (op
))
10833 index
= XEXP (op
, 0);
10834 scale_rtx
= XEXP (op
, 1);
10840 index
= XEXP (op
, 0);
10841 tmp
= XEXP (op
, 1);
10842 if (!CONST_INT_P (tmp
))
10844 scale
= INTVAL (tmp
);
10845 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10847 scale
= 1 << scale
;
10852 if (GET_CODE (op
) != UNSPEC
)
10857 if (XINT (op
, 1) == UNSPEC_TP
10858 && TARGET_TLS_DIRECT_SEG_REFS
10859 && seg
== ADDR_SPACE_GENERIC
)
10860 seg
= DEFAULT_TLS_SEG_REG
;
10866 if (!REG_P (SUBREG_REG (op
)))
10893 else if (GET_CODE (addr
) == MULT
)
10895 index
= XEXP (addr
, 0); /* index*scale */
10896 scale_rtx
= XEXP (addr
, 1);
10898 else if (GET_CODE (addr
) == ASHIFT
)
10900 /* We're called for lea too, which implements ashift on occasion. */
10901 index
= XEXP (addr
, 0);
10902 tmp
= XEXP (addr
, 1);
10903 if (!CONST_INT_P (tmp
))
10905 scale
= INTVAL (tmp
);
10906 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10908 scale
= 1 << scale
;
10911 disp
= addr
; /* displacement */
10917 else if (SUBREG_P (index
)
10918 && REG_P (SUBREG_REG (index
)))
10924 /* Extract the integral value of scale. */
10927 if (!CONST_INT_P (scale_rtx
))
10929 scale
= INTVAL (scale_rtx
);
10932 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
10933 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
10935 /* Avoid useless 0 displacement. */
10936 if (disp
== const0_rtx
&& (base
|| index
))
10939 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10940 if (base_reg
&& index_reg
&& scale
== 1
10941 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
10942 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
10943 || REGNO (index_reg
) == SP_REG
))
10945 std::swap (base
, index
);
10946 std::swap (base_reg
, index_reg
);
10949 /* Special case: %ebp cannot be encoded as a base without a displacement.
10951 if (!disp
&& base_reg
10952 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
10953 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
10954 || REGNO (base_reg
) == BP_REG
10955 || REGNO (base_reg
) == R13_REG
))
10958 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10959 Avoid this by transforming to [%esi+0].
10960 Reload calls address legitimization without cfun defined, so we need
10961 to test cfun for being non-NULL. */
10962 if (TARGET_CPU_P (K6
) && cfun
&& optimize_function_for_speed_p (cfun
)
10963 && base_reg
&& !index_reg
&& !disp
10964 && REGNO (base_reg
) == SI_REG
)
10967 /* Special case: encode reg+reg instead of reg*2. */
10968 if (!base
&& index
&& scale
== 2)
10969 base
= index
, base_reg
= index_reg
, scale
= 1;
10971 /* Special case: scaling cannot be encoded without base or displacement. */
10972 if (!base
&& !disp
&& index
&& scale
!= 1)
10976 out
->index
= index
;
10978 out
->scale
= scale
;
10984 /* Return cost of the memory address x.
10985 For i386, it is better to use a complex address than let gcc copy
10986 the address into a reg and make a new pseudo. But not if the address
10987 requires to two regs - that would mean more pseudos with longer
10990 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
10992 struct ix86_address parts
;
10994 int ok
= ix86_decompose_address (x
, &parts
);
10998 if (parts
.base
&& SUBREG_P (parts
.base
))
10999 parts
.base
= SUBREG_REG (parts
.base
);
11000 if (parts
.index
&& SUBREG_P (parts
.index
))
11001 parts
.index
= SUBREG_REG (parts
.index
);
11003 /* Attempt to minimize number of registers in the address by increasing
11004 address cost for each used register. We don't increase address cost
11005 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
11006 is not invariant itself it most likely means that base or index is not
11007 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
11008 which is not profitable for x86. */
11010 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11011 && (current_pass
->type
== GIMPLE_PASS
11012 || !pic_offset_table_rtx
11013 || !REG_P (parts
.base
)
11014 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
11018 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11019 && (current_pass
->type
== GIMPLE_PASS
11020 || !pic_offset_table_rtx
11021 || !REG_P (parts
.index
)
11022 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
11025 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11026 since it's predecode logic can't detect the length of instructions
11027 and it degenerates to vector decoded. Increase cost of such
11028 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11029 to split such addresses or even refuse such addresses at all.
11031 Following addressing modes are affected:
11036 The first and last case may be avoidable by explicitly coding the zero in
11037 memory address, but I don't have AMD-K6 machine handy to check this
11040 if (TARGET_CPU_P (K6
)
11041 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11042 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11043 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11049 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11050 this is used for to form addresses to local data when -fPIC is in
11054 darwin_local_data_pic (rtx disp
)
11056 return (GET_CODE (disp
) == UNSPEC
11057 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11060 /* True if the function symbol operand X should be loaded from GOT.
11061 If CALL_P is true, X is a call operand.
11063 NB: -mno-direct-extern-access doesn't force load from GOT for
11066 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
11067 statements, since a PIC register could not be available at the
11071 ix86_force_load_from_GOT_p (rtx x
, bool call_p
)
11073 return ((TARGET_64BIT
|| (!flag_pic
&& HAVE_AS_IX86_GOT32X
))
11074 && !TARGET_PECOFF
&& !TARGET_MACHO
11075 && (!flag_pic
|| this_is_asm_operands
)
11076 && ix86_cmodel
!= CM_LARGE
11077 && ix86_cmodel
!= CM_LARGE_PIC
11078 && GET_CODE (x
) == SYMBOL_REF
11080 && (!ix86_direct_extern_access
11081 || (SYMBOL_REF_DECL (x
)
11082 && lookup_attribute ("nodirect_extern_access",
11083 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
))))))
11084 || (SYMBOL_REF_FUNCTION_P (x
)
11086 || (SYMBOL_REF_DECL (x
)
11087 && lookup_attribute ("noplt",
11088 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
)))))))
11089 && !SYMBOL_REF_LOCAL_P (x
));
11092 /* Determine if a given RTX is a valid constant. We already know this
11093 satisfies CONSTANT_P. */
11096 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
11098 switch (GET_CODE (x
))
11103 if (GET_CODE (x
) == PLUS
)
11105 if (!CONST_INT_P (XEXP (x
, 1)))
11110 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11113 /* Only some unspecs are valid as "constants". */
11114 if (GET_CODE (x
) == UNSPEC
)
11115 switch (XINT (x
, 1))
11118 case UNSPEC_GOTOFF
:
11119 case UNSPEC_PLTOFF
:
11120 return TARGET_64BIT
;
11122 case UNSPEC_NTPOFF
:
11123 x
= XVECEXP (x
, 0, 0);
11124 return (GET_CODE (x
) == SYMBOL_REF
11125 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11126 case UNSPEC_DTPOFF
:
11127 x
= XVECEXP (x
, 0, 0);
11128 return (GET_CODE (x
) == SYMBOL_REF
11129 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11134 /* We must have drilled down to a symbol. */
11135 if (GET_CODE (x
) == LABEL_REF
)
11137 if (GET_CODE (x
) != SYMBOL_REF
)
11142 /* TLS symbols are never valid. */
11143 if (SYMBOL_REF_TLS_MODEL (x
))
11146 /* DLLIMPORT symbols are never valid. */
11147 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11148 && SYMBOL_REF_DLLIMPORT_P (x
))
11152 /* mdynamic-no-pic */
11153 if (MACHO_DYNAMIC_NO_PIC_P
)
11154 return machopic_symbol_defined_p (x
);
11157 /* External function address should be loaded
11158 via the GOT slot to avoid PLT. */
11159 if (ix86_force_load_from_GOT_p (x
))
11164 CASE_CONST_SCALAR_INT
:
11165 if (ix86_endbr_immediate_operand (x
, VOIDmode
))
11176 if (!standard_sse_constant_p (x
, mode
)
11177 && GET_MODE_SIZE (TARGET_AVX512F
&& TARGET_EVEX512
11182 ? TImode
: DImode
))) < GET_MODE_SIZE (mode
))
11190 if (!standard_sse_constant_p (x
, mode
))
11195 if (mode
== E_BFmode
)
11202 /* Otherwise we handle everything else in the move patterns. */
11206 /* Determine if it's legal to put X into the constant pool. This
11207 is not possible for the address of thread-local symbols, which
11208 is checked above. */
11211 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
11213 /* We can put any immediate constant in memory. */
11214 switch (GET_CODE (x
))
11223 return !ix86_legitimate_constant_p (mode
, x
);
11226 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
11230 is_imported_p (rtx x
)
11232 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
11233 || GET_CODE (x
) != SYMBOL_REF
)
11236 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
11240 /* Nonzero if the constant value X is a legitimate general operand
11241 when generating PIC code. It is given that flag_pic is on and
11242 that X satisfies CONSTANT_P. */
11245 legitimate_pic_operand_p (rtx x
)
11249 switch (GET_CODE (x
))
11252 inner
= XEXP (x
, 0);
11253 if (GET_CODE (inner
) == PLUS
11254 && CONST_INT_P (XEXP (inner
, 1)))
11255 inner
= XEXP (inner
, 0);
11257 /* Only some unspecs are valid as "constants". */
11258 if (GET_CODE (inner
) == UNSPEC
)
11259 switch (XINT (inner
, 1))
11262 case UNSPEC_GOTOFF
:
11263 case UNSPEC_PLTOFF
:
11264 return TARGET_64BIT
;
11266 x
= XVECEXP (inner
, 0, 0);
11267 return (GET_CODE (x
) == SYMBOL_REF
11268 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11269 case UNSPEC_MACHOPIC_OFFSET
:
11270 return legitimate_pic_address_disp_p (x
);
11278 return legitimate_pic_address_disp_p (x
);
11285 /* Determine if a given CONST RTX is a valid memory displacement
11289 legitimate_pic_address_disp_p (rtx disp
)
11293 /* In 64bit mode we can allow direct addresses of symbols and labels
11294 when they are not dynamic symbols. */
11297 rtx op0
= disp
, op1
;
11299 switch (GET_CODE (disp
))
11305 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11307 op0
= XEXP (XEXP (disp
, 0), 0);
11308 op1
= XEXP (XEXP (disp
, 0), 1);
11309 if (!CONST_INT_P (op1
))
11311 if (GET_CODE (op0
) == UNSPEC
11312 && (XINT (op0
, 1) == UNSPEC_DTPOFF
11313 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
11314 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
11316 if (INTVAL (op1
) >= 16*1024*1024
11317 || INTVAL (op1
) < -16*1024*1024)
11319 if (GET_CODE (op0
) == LABEL_REF
)
11321 if (GET_CODE (op0
) == CONST
11322 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11323 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11325 if (GET_CODE (op0
) == UNSPEC
11326 && XINT (op0
, 1) == UNSPEC_PCREL
)
11328 if (GET_CODE (op0
) != SYMBOL_REF
)
11333 /* TLS references should always be enclosed in UNSPEC.
11334 The dllimported symbol needs always to be resolved. */
11335 if (SYMBOL_REF_TLS_MODEL (op0
)
11336 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
11341 if (is_imported_p (op0
))
11344 if (SYMBOL_REF_FAR_ADDR_P (op0
) || !SYMBOL_REF_LOCAL_P (op0
))
11347 /* Non-external-weak function symbols need to be resolved only
11348 for the large model. Non-external symbols don't need to be
11349 resolved for large and medium models. For the small model,
11350 we don't need to resolve anything here. */
11351 if ((ix86_cmodel
!= CM_LARGE_PIC
11352 && SYMBOL_REF_FUNCTION_P (op0
)
11353 && !(SYMBOL_REF_EXTERNAL_P (op0
) && SYMBOL_REF_WEAK (op0
)))
11354 || !SYMBOL_REF_EXTERNAL_P (op0
)
11355 || ix86_cmodel
== CM_SMALL_PIC
)
11358 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
11359 && (SYMBOL_REF_LOCAL_P (op0
)
11360 || ((ix86_direct_extern_access
11361 && !(SYMBOL_REF_DECL (op0
)
11362 && lookup_attribute ("nodirect_extern_access",
11363 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0
)))))
11364 && HAVE_LD_PIE_COPYRELOC
11366 && !SYMBOL_REF_WEAK (op0
)
11367 && !SYMBOL_REF_FUNCTION_P (op0
)))
11368 && ix86_cmodel
!= CM_LARGE_PIC
)
11376 if (GET_CODE (disp
) != CONST
)
11378 disp
= XEXP (disp
, 0);
11382 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11383 of GOT tables. We should not need these anyway. */
11384 if (GET_CODE (disp
) != UNSPEC
11385 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11386 && XINT (disp
, 1) != UNSPEC_GOTOFF
11387 && XINT (disp
, 1) != UNSPEC_PCREL
11388 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11391 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11392 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11398 if (GET_CODE (disp
) == PLUS
)
11400 if (!CONST_INT_P (XEXP (disp
, 1)))
11402 disp
= XEXP (disp
, 0);
11406 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11409 if (GET_CODE (disp
) != UNSPEC
)
11412 switch (XINT (disp
, 1))
11417 /* We need to check for both symbols and labels because VxWorks loads
11418 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11420 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11421 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11422 case UNSPEC_GOTOFF
:
11423 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11424 While ABI specify also 32bit relocation but we don't produce it in
11425 small PIC model at all. */
11426 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11427 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11429 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11431 case UNSPEC_GOTTPOFF
:
11432 case UNSPEC_GOTNTPOFF
:
11433 case UNSPEC_INDNTPOFF
:
11436 disp
= XVECEXP (disp
, 0, 0);
11437 return (GET_CODE (disp
) == SYMBOL_REF
11438 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11439 case UNSPEC_NTPOFF
:
11440 disp
= XVECEXP (disp
, 0, 0);
11441 return (GET_CODE (disp
) == SYMBOL_REF
11442 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11443 case UNSPEC_DTPOFF
:
11444 disp
= XVECEXP (disp
, 0, 0);
11445 return (GET_CODE (disp
) == SYMBOL_REF
11446 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11452 /* Determine if op is suitable RTX for an address register.
11453 Return naked register if a register or a register subreg is
11454 found, otherwise return NULL_RTX. */
11457 ix86_validate_address_register (rtx op
)
11459 machine_mode mode
= GET_MODE (op
);
11461 /* Only SImode or DImode registers can form the address. */
11462 if (mode
!= SImode
&& mode
!= DImode
)
11467 else if (SUBREG_P (op
))
11469 rtx reg
= SUBREG_REG (op
);
11474 mode
= GET_MODE (reg
);
11476 /* Don't allow SUBREGs that span more than a word. It can
11477 lead to spill failures when the register is one word out
11478 of a two word structure. */
11479 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11482 /* Allow only SUBREGs of non-eliminable hard registers. */
11483 if (register_no_elim_operand (reg
, mode
))
11487 /* Op is not a register. */
11491 /* Determine which memory address register set insn can use. */
11493 static enum attr_addr
11494 ix86_memory_address_reg_class (rtx_insn
* insn
)
11496 /* LRA can do some initialization with NULL insn,
11497 return maximum register class in this case. */
11498 enum attr_addr addr_rclass
= ADDR_GPR32
;
11501 return addr_rclass
;
11503 if (asm_noperands (PATTERN (insn
)) >= 0
11504 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)
11505 return ix86_apx_inline_asm_use_gpr32
? ADDR_GPR32
: ADDR_GPR16
;
11507 /* Return maximum register class for unrecognized instructions. */
11508 if (INSN_CODE (insn
) < 0)
11509 return addr_rclass
;
11511 /* Try to recognize the insn before calling get_attr_addr.
11512 Save current recog_data and current alternative. */
11513 struct recog_data_d saved_recog_data
= recog_data
;
11514 int saved_alternative
= which_alternative
;
11516 /* Update recog_data for processing of alternatives. */
11517 extract_insn_cached (insn
);
11519 /* If current alternative is not set, loop throught enabled
11520 alternatives and get the most limited register class. */
11521 if (saved_alternative
== -1)
11523 alternative_mask enabled
= get_enabled_alternatives (insn
);
11525 for (int i
= 0; i
< recog_data
.n_alternatives
; i
++)
11527 if (!TEST_BIT (enabled
, i
))
11530 which_alternative
= i
;
11531 addr_rclass
= MIN (addr_rclass
, get_attr_addr (insn
));
11536 which_alternative
= saved_alternative
;
11537 addr_rclass
= get_attr_addr (insn
);
11540 recog_data
= saved_recog_data
;
11541 which_alternative
= saved_alternative
;
11543 return addr_rclass
;
11546 /* Return memory address register class insn can use. */
11549 ix86_insn_base_reg_class (rtx_insn
* insn
)
11551 switch (ix86_memory_address_reg_class (insn
))
11554 return LEGACY_GENERAL_REGS
;
11556 return GENERAL_GPR16
;
11560 gcc_unreachable ();
11563 return BASE_REG_CLASS
;
11567 ix86_regno_ok_for_insn_base_p (int regno
, rtx_insn
* insn
)
11569 switch (ix86_memory_address_reg_class (insn
))
11572 return LEGACY_INT_REGNO_P (regno
);
11574 return GENERAL_GPR16_REGNO_P (regno
);
11578 gcc_unreachable ();
11581 return GENERAL_REGNO_P (regno
);
11585 ix86_insn_index_reg_class (rtx_insn
* insn
)
11587 switch (ix86_memory_address_reg_class (insn
))
11590 return LEGACY_INDEX_REGS
;
11592 return INDEX_GPR16
;
11596 gcc_unreachable ();
11599 return INDEX_REG_CLASS
;
11602 /* Recognizes RTL expressions that are valid memory addresses for an
11603 instruction. The MODE argument is the machine mode for the MEM
11604 expression that wants to use this address.
11606 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11607 convert common non-canonical forms to canonical form so that they will
11611 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
,
11612 code_helper
= ERROR_MARK
)
11614 struct ix86_address parts
;
11615 rtx base
, index
, disp
;
11616 HOST_WIDE_INT scale
;
11619 if (ix86_decompose_address (addr
, &parts
) == 0)
11620 /* Decomposition failed. */
11624 index
= parts
.index
;
11626 scale
= parts
.scale
;
11629 /* Validate base register. */
11632 rtx reg
= ix86_validate_address_register (base
);
11634 if (reg
== NULL_RTX
)
11637 unsigned int regno
= REGNO (reg
);
11638 if ((strict
&& !REGNO_OK_FOR_BASE_P (regno
))
11639 || (!strict
&& !REGNO_OK_FOR_BASE_NONSTRICT_P (regno
)))
11640 /* Base is not valid. */
11644 /* Validate index register. */
11647 rtx reg
= ix86_validate_address_register (index
);
11649 if (reg
== NULL_RTX
)
11652 unsigned int regno
= REGNO (reg
);
11653 if ((strict
&& !REGNO_OK_FOR_INDEX_P (regno
))
11654 || (!strict
&& !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno
)))
11655 /* Index is not valid. */
11659 /* Index and base should have the same mode. */
11661 && GET_MODE (base
) != GET_MODE (index
))
11664 /* Address override works only on the (%reg) part of %fs:(%reg). */
11665 if (seg
!= ADDR_SPACE_GENERIC
11666 && ((base
&& GET_MODE (base
) != word_mode
)
11667 || (index
&& GET_MODE (index
) != word_mode
)))
11670 /* Validate scale factor. */
11674 /* Scale without index. */
11677 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
11678 /* Scale is not a valid multiplier. */
11682 /* Validate displacement. */
11685 if (ix86_endbr_immediate_operand (disp
, VOIDmode
))
11688 if (GET_CODE (disp
) == CONST
11689 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
11690 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
11691 switch (XINT (XEXP (disp
, 0), 1))
11693 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11694 when used. While ABI specify also 32bit relocations, we
11695 don't produce them at all and use IP relative instead.
11696 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11697 should be loaded via GOT. */
11700 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
11701 goto is_legitimate_pic
;
11703 case UNSPEC_GOTOFF
:
11704 gcc_assert (flag_pic
);
11706 goto is_legitimate_pic
;
11708 /* 64bit address unspec. */
11711 case UNSPEC_GOTPCREL
:
11712 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
11713 goto is_legitimate_pic
;
11716 gcc_assert (flag_pic
);
11717 goto is_legitimate_pic
;
11719 case UNSPEC_GOTTPOFF
:
11720 case UNSPEC_GOTNTPOFF
:
11721 case UNSPEC_INDNTPOFF
:
11722 case UNSPEC_NTPOFF
:
11723 case UNSPEC_DTPOFF
:
11727 /* Invalid address unspec. */
11731 else if (SYMBOLIC_CONST (disp
)
11734 || (MACHOPIC_INDIRECT
11735 && !machopic_operand_p (disp
))
11741 if (TARGET_64BIT
&& (index
|| base
))
11743 /* foo@dtpoff(%rX) is ok. */
11744 if (GET_CODE (disp
) != CONST
11745 || GET_CODE (XEXP (disp
, 0)) != PLUS
11746 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
11747 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
11748 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
11749 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
11750 /* Non-constant pic memory reference. */
11753 else if ((!TARGET_MACHO
|| flag_pic
)
11754 && ! legitimate_pic_address_disp_p (disp
))
11755 /* Displacement is an invalid pic construct. */
11758 else if (MACHO_DYNAMIC_NO_PIC_P
11759 && !ix86_legitimate_constant_p (Pmode
, disp
))
11760 /* displacment must be referenced via non_lazy_pointer */
11764 /* This code used to verify that a symbolic pic displacement
11765 includes the pic_offset_table_rtx register.
11767 While this is good idea, unfortunately these constructs may
11768 be created by "adds using lea" optimization for incorrect
11777 This code is nonsensical, but results in addressing
11778 GOT table with pic_offset_table_rtx base. We can't
11779 just refuse it easily, since it gets matched by
11780 "addsi3" pattern, that later gets split to lea in the
11781 case output register differs from input. While this
11782 can be handled by separate addsi pattern for this case
11783 that never results in lea, this seems to be easier and
11784 correct fix for crash to disable this test. */
11786 else if (GET_CODE (disp
) != LABEL_REF
11787 && !CONST_INT_P (disp
)
11788 && (GET_CODE (disp
) != CONST
11789 || !ix86_legitimate_constant_p (Pmode
, disp
))
11790 && (GET_CODE (disp
) != SYMBOL_REF
11791 || !ix86_legitimate_constant_p (Pmode
, disp
)))
11792 /* Displacement is not constant. */
11794 else if (TARGET_64BIT
11795 && !x86_64_immediate_operand (disp
, VOIDmode
))
11796 /* Displacement is out of range. */
11798 /* In x32 mode, constant addresses are sign extended to 64bit, so
11799 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11800 else if (TARGET_X32
&& !(index
|| base
)
11801 && CONST_INT_P (disp
)
11802 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
11806 /* Everything looks valid. */
11810 /* Determine if a given RTX is a valid constant address. */
11813 constant_address_p (rtx x
)
11815 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
11818 /* Return a unique alias set for the GOT. */
11821 ix86_GOT_alias_set (void)
11823 static alias_set_type set
= -1;
11825 set
= new_alias_set ();
11829 /* Return a legitimate reference for ORIG (an address) using the
11830 register REG. If REG is 0, a new pseudo is generated.
11832 There are two types of references that must be handled:
11834 1. Global data references must load the address from the GOT, via
11835 the PIC reg. An insn is emitted to do this load, and the reg is
11838 2. Static data references, constant pool addresses, and code labels
11839 compute the address as an offset from the GOT, whose base is in
11840 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11841 differentiate them from global data objects. The returned
11842 address is the PIC reg + an unspec constant.
11844 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11845 reg also appears in the address. */
11848 legitimize_pic_address (rtx orig
, rtx reg
)
11851 rtx new_rtx
= orig
;
11854 if (TARGET_MACHO
&& !TARGET_64BIT
)
11857 reg
= gen_reg_rtx (Pmode
);
11858 /* Use the generic Mach-O PIC machinery. */
11859 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
11863 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11865 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
11870 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
11872 else if ((!TARGET_64BIT
11873 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
11875 && gotoff_operand (addr
, Pmode
))
11877 /* This symbol may be referenced via a displacement
11878 from the PIC base address (@GOTOFF). */
11879 if (GET_CODE (addr
) == CONST
)
11880 addr
= XEXP (addr
, 0);
11882 if (GET_CODE (addr
) == PLUS
)
11884 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
11886 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
11889 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
11891 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11894 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11898 gcc_assert (REG_P (reg
));
11899 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
11900 new_rtx
, reg
, 1, OPTAB_DIRECT
);
11903 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11905 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
11906 /* We can't always use @GOTOFF for text labels
11907 on VxWorks, see gotoff_operand. */
11908 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
11910 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
11914 /* For x64 PE-COFF there is no GOT table,
11915 so we use address directly. */
11916 if (TARGET_64BIT
&& TARGET_PECOFF
)
11918 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
11919 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11921 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
11923 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
11925 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11926 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11927 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11931 /* This symbol must be referenced via a load
11932 from the Global Offset Table (@GOT). */
11933 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
11934 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11937 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11941 gcc_assert (REG_P (reg
));
11942 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
11943 new_rtx
, reg
, 1, OPTAB_DIRECT
);
11946 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11948 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11949 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11952 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11956 if (CONST_INT_P (addr
)
11957 && !x86_64_immediate_operand (addr
, VOIDmode
))
11958 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
11959 else if (GET_CODE (addr
) == CONST
)
11961 addr
= XEXP (addr
, 0);
11963 /* We must match stuff we generate before. Assume the only
11964 unspecs that can get here are ours. Not that we could do
11965 anything with them anyway.... */
11966 if (GET_CODE (addr
) == UNSPEC
11967 || (GET_CODE (addr
) == PLUS
11968 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
11970 gcc_assert (GET_CODE (addr
) == PLUS
);
11973 if (GET_CODE (addr
) == PLUS
)
11975 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
11977 /* Check first to see if this is a constant
11978 offset from a @GOTOFF symbol reference. */
11980 && gotoff_operand (op0
, Pmode
)
11981 && CONST_INT_P (op1
))
11985 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
11987 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
11988 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11992 gcc_assert (REG_P (reg
));
11993 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
11994 pic_offset_table_rtx
,
12000 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12004 if (INTVAL (op1
) < -16*1024*1024
12005 || INTVAL (op1
) >= 16*1024*1024)
12007 if (!x86_64_immediate_operand (op1
, Pmode
))
12008 op1
= force_reg (Pmode
, op1
);
12011 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12017 rtx base
= legitimize_pic_address (op0
, reg
);
12018 machine_mode mode
= GET_MODE (base
);
12020 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12022 if (CONST_INT_P (new_rtx
))
12024 if (INTVAL (new_rtx
) < -16*1024*1024
12025 || INTVAL (new_rtx
) >= 16*1024*1024)
12027 if (!x86_64_immediate_operand (new_rtx
, mode
))
12028 new_rtx
= force_reg (mode
, new_rtx
);
12031 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12034 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12038 /* For %rip addressing, we have to use
12039 just disp32, not base nor index. */
12041 && (GET_CODE (base
) == SYMBOL_REF
12042 || GET_CODE (base
) == LABEL_REF
))
12043 base
= force_reg (mode
, base
);
12044 if (GET_CODE (new_rtx
) == PLUS
12045 && CONSTANT_P (XEXP (new_rtx
, 1)))
12047 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12048 new_rtx
= XEXP (new_rtx
, 1);
12050 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12058 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12061 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
12063 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12065 if (GET_MODE (tp
) != tp_mode
)
12067 gcc_assert (GET_MODE (tp
) == SImode
);
12068 gcc_assert (tp_mode
== DImode
);
12070 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12074 tp
= copy_to_mode_reg (tp_mode
, tp
);
12079 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12081 static GTY(()) rtx ix86_tls_symbol
;
12084 ix86_tls_get_addr (void)
12086 if (!ix86_tls_symbol
)
12089 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12090 ? "___tls_get_addr" : "__tls_get_addr");
12092 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12095 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
12097 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
12099 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
12100 gen_rtx_CONST (Pmode
, unspec
));
12103 return ix86_tls_symbol
;
12106 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12108 static GTY(()) rtx ix86_tls_module_base_symbol
;
12111 ix86_tls_module_base (void)
12113 if (!ix86_tls_module_base_symbol
)
12115 ix86_tls_module_base_symbol
12116 = gen_rtx_SYMBOL_REF (ptr_mode
, "_TLS_MODULE_BASE_");
12118 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12119 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12122 return ix86_tls_module_base_symbol
;
12125 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12126 false if we expect this to be used for a memory address and true if
12127 we expect to load the address into a register. */
12130 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12132 rtx dest
, base
, off
;
12133 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12134 machine_mode tp_mode
= Pmode
;
12137 /* Fall back to global dynamic model if tool chain cannot support local
12139 if (TARGET_SUN_TLS
&& !TARGET_64BIT
12140 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
12141 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
12142 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
12146 case TLS_MODEL_GLOBAL_DYNAMIC
:
12149 if (flag_pic
&& !TARGET_PECOFF
)
12150 pic
= pic_offset_table_rtx
;
12153 pic
= gen_reg_rtx (Pmode
);
12154 emit_insn (gen_set_got (pic
));
12158 if (TARGET_GNU2_TLS
)
12160 dest
= gen_reg_rtx (ptr_mode
);
12162 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, dest
, x
));
12164 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12166 tp
= get_thread_pointer (ptr_mode
, true);
12167 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
12168 if (GET_MODE (dest
) != Pmode
)
12169 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
12170 dest
= force_reg (Pmode
, dest
);
12172 if (GET_MODE (x
) != Pmode
)
12173 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12175 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12179 rtx caddr
= ix86_tls_get_addr ();
12181 dest
= gen_reg_rtx (Pmode
);
12184 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12189 (gen_tls_global_dynamic_64 (Pmode
, rax
, x
, caddr
));
12190 insns
= get_insns ();
12193 if (GET_MODE (x
) != Pmode
)
12194 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12196 RTL_CONST_CALL_P (insns
) = 1;
12197 emit_libcall_block (insns
, dest
, rax
, x
);
12200 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12204 case TLS_MODEL_LOCAL_DYNAMIC
:
12208 pic
= pic_offset_table_rtx
;
12211 pic
= gen_reg_rtx (Pmode
);
12212 emit_insn (gen_set_got (pic
));
12216 if (TARGET_GNU2_TLS
)
12218 rtx tmp
= ix86_tls_module_base ();
12220 base
= gen_reg_rtx (ptr_mode
);
12222 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, base
, tmp
));
12224 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12226 tp
= get_thread_pointer (ptr_mode
, true);
12227 if (GET_MODE (base
) != Pmode
)
12228 base
= gen_rtx_ZERO_EXTEND (Pmode
, base
);
12229 base
= force_reg (Pmode
, base
);
12233 rtx caddr
= ix86_tls_get_addr ();
12235 base
= gen_reg_rtx (Pmode
);
12238 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12244 (gen_tls_local_dynamic_base_64 (Pmode
, rax
, caddr
));
12245 insns
= get_insns ();
12248 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12249 share the LD_BASE result with other LD model accesses. */
12250 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12251 UNSPEC_TLS_LD_BASE
);
12253 RTL_CONST_CALL_P (insns
) = 1;
12254 emit_libcall_block (insns
, base
, rax
, eqv
);
12257 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12260 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12261 off
= gen_rtx_CONST (Pmode
, off
);
12263 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12265 if (TARGET_GNU2_TLS
)
12267 if (GET_MODE (tp
) != Pmode
)
12269 dest
= lowpart_subreg (ptr_mode
, dest
, Pmode
);
12270 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
12271 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
12274 dest
= gen_rtx_PLUS (Pmode
, tp
, dest
);
12275 dest
= force_reg (Pmode
, dest
);
12277 if (GET_MODE (x
) != Pmode
)
12278 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12280 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12284 case TLS_MODEL_INITIAL_EXEC
:
12287 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12289 /* The Sun linker took the AMD64 TLS spec literally
12290 and can only handle %rax as destination of the
12291 initial executable code sequence. */
12293 dest
= gen_reg_rtx (DImode
);
12294 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12298 /* Generate DImode references to avoid %fs:(%reg32)
12299 problems and linker IE->LE relaxation bug. */
12302 type
= UNSPEC_GOTNTPOFF
;
12306 pic
= pic_offset_table_rtx
;
12307 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12309 else if (!TARGET_ANY_GNU_TLS
)
12311 pic
= gen_reg_rtx (Pmode
);
12312 emit_insn (gen_set_got (pic
));
12313 type
= UNSPEC_GOTTPOFF
;
12318 type
= UNSPEC_INDNTPOFF
;
12321 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12322 off
= gen_rtx_CONST (tp_mode
, off
);
12324 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12325 off
= gen_const_mem (tp_mode
, off
);
12326 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12328 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12330 base
= get_thread_pointer (tp_mode
,
12331 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12332 off
= force_reg (tp_mode
, off
);
12333 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
12334 if (tp_mode
!= Pmode
)
12335 dest
= convert_to_mode (Pmode
, dest
, 1);
12339 base
= get_thread_pointer (Pmode
, true);
12340 dest
= gen_reg_rtx (Pmode
);
12341 emit_insn (gen_sub3_insn (dest
, base
, off
));
12345 case TLS_MODEL_LOCAL_EXEC
:
12346 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12347 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12348 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12349 off
= gen_rtx_CONST (Pmode
, off
);
12351 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12353 base
= get_thread_pointer (Pmode
,
12354 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12355 return gen_rtx_PLUS (Pmode
, base
, off
);
12359 base
= get_thread_pointer (Pmode
, true);
12360 dest
= gen_reg_rtx (Pmode
);
12361 emit_insn (gen_sub3_insn (dest
, base
, off
));
12366 gcc_unreachable ();
12372 /* Return true if the TLS address requires insn using integer registers.
12373 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
12374 MOV instructions, refer to PR103275. */
12376 ix86_gpr_tls_address_pattern_p (rtx mem
)
12378 gcc_assert (MEM_P (mem
));
12380 rtx addr
= XEXP (mem
, 0);
12381 subrtx_var_iterator::array_type array
;
12382 FOR_EACH_SUBRTX_VAR (iter
, array
, addr
, ALL
)
12385 if (GET_CODE (op
) == UNSPEC
)
12386 switch (XINT (op
, 1))
12388 case UNSPEC_GOTNTPOFF
:
12402 /* Return true if OP refers to a TLS address. */
12404 ix86_tls_address_pattern_p (rtx op
)
12406 subrtx_var_iterator::array_type array
;
12407 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
12412 rtx
*x
= &XEXP (op
, 0);
12413 while (GET_CODE (*x
) == PLUS
)
12416 for (i
= 0; i
< 2; i
++)
12418 rtx u
= XEXP (*x
, i
);
12419 if (GET_CODE (u
) == ZERO_EXTEND
)
12421 if (GET_CODE (u
) == UNSPEC
12422 && XINT (u
, 1) == UNSPEC_TP
)
12428 iter
.skip_subrtxes ();
12435 /* Rewrite *LOC so that it refers to a default TLS address space. */
12437 ix86_rewrite_tls_address_1 (rtx
*loc
)
12439 subrtx_ptr_iterator::array_type array
;
12440 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
12445 rtx addr
= XEXP (*loc
, 0);
12447 while (GET_CODE (*x
) == PLUS
)
12450 for (i
= 0; i
< 2; i
++)
12452 rtx u
= XEXP (*x
, i
);
12453 if (GET_CODE (u
) == ZERO_EXTEND
)
12455 if (GET_CODE (u
) == UNSPEC
12456 && XINT (u
, 1) == UNSPEC_TP
)
12458 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
12460 *x
= XEXP (*x
, 1 - i
);
12462 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
12463 set_mem_addr_space (*loc
, as
);
12470 iter
.skip_subrtxes ();
12475 /* Rewrite instruction pattern involvning TLS address
12476 so that it refers to a default TLS address space. */
12478 ix86_rewrite_tls_address (rtx pattern
)
12480 pattern
= copy_insn (pattern
);
12481 ix86_rewrite_tls_address_1 (&pattern
);
12485 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12486 to symbol DECL if BEIMPORT is true. Otherwise create or return the
12487 unique refptr-DECL symbol corresponding to symbol DECL. */
12489 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
12491 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
12493 equal (tree_map
*a
, tree_map
*b
)
12495 return a
->base
.from
== b
->base
.from
;
12499 keep_cache_entry (tree_map
*&m
)
12501 return ggc_marked_p (m
->base
.from
);
12505 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
12508 get_dllimport_decl (tree decl
, bool beimport
)
12510 struct tree_map
*h
, in
;
12512 const char *prefix
;
12513 size_t namelen
, prefixlen
;
12518 if (!dllimport_map
)
12519 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
12521 in
.hash
= htab_hash_pointer (decl
);
12522 in
.base
.from
= decl
;
12523 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
12528 *loc
= h
= ggc_alloc
<tree_map
> ();
12530 h
->base
.from
= decl
;
12531 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12532 VAR_DECL
, NULL
, ptr_type_node
);
12533 DECL_ARTIFICIAL (to
) = 1;
12534 DECL_IGNORED_P (to
) = 1;
12535 DECL_EXTERNAL (to
) = 1;
12536 TREE_READONLY (to
) = 1;
12538 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12539 name
= targetm
.strip_name_encoding (name
);
12541 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12542 ? "*__imp_" : "*__imp__";
12544 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
12545 namelen
= strlen (name
);
12546 prefixlen
= strlen (prefix
);
12547 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12548 memcpy (imp_name
, prefix
, prefixlen
);
12549 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12551 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12552 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12553 SET_SYMBOL_REF_DECL (rtl
, to
);
12554 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
12557 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
12558 #ifdef SUB_TARGET_RECORD_STUB
12559 SUB_TARGET_RECORD_STUB (name
);
12563 rtl
= gen_const_mem (Pmode
, rtl
);
12564 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12566 SET_DECL_RTL (to
, rtl
);
12567 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12572 /* Expand SYMBOL into its corresponding far-address symbol.
12573 WANT_REG is true if we require the result be a register. */
12576 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
12581 gcc_assert (SYMBOL_REF_DECL (symbol
));
12582 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
12584 x
= DECL_RTL (imp_decl
);
12586 x
= force_reg (Pmode
, x
);
12590 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12591 true if we require the result be a register. */
12594 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12599 gcc_assert (SYMBOL_REF_DECL (symbol
));
12600 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
12602 x
= DECL_RTL (imp_decl
);
12604 x
= force_reg (Pmode
, x
);
12608 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12609 is true if we require the result be a register. */
12612 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
12614 if (!TARGET_PECOFF
)
12617 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12619 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12620 return legitimize_dllimport_symbol (addr
, inreg
);
12621 if (GET_CODE (addr
) == CONST
12622 && GET_CODE (XEXP (addr
, 0)) == PLUS
12623 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12624 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12626 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
12627 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12631 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
12633 if (GET_CODE (addr
) == SYMBOL_REF
12634 && !is_imported_p (addr
)
12635 && SYMBOL_REF_EXTERNAL_P (addr
)
12636 && SYMBOL_REF_DECL (addr
))
12637 return legitimize_pe_coff_extern_decl (addr
, inreg
);
12639 if (GET_CODE (addr
) == CONST
12640 && GET_CODE (XEXP (addr
, 0)) == PLUS
12641 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12642 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
12643 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
12644 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
12646 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
12647 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12652 /* Try machine-dependent ways of modifying an illegitimate address
12653 to be legitimate. If we find one, return the new, valid address.
12654 This macro is used in only one place: `memory_address' in explow.cc.
12656 OLDX is the address as it was before break_out_memory_refs was called.
12657 In some cases it is useful to look at this to decide what needs to be done.
12659 It is always safe for this macro to do nothing. It exists to recognize
12660 opportunities to optimize the output.
12662 For the 80386, we handle X+REG by loading X into a register R and
12663 using R+REG. R will go in a general reg and indexing will be used.
12664 However, if REG is a broken-out memory address or multiplication,
12665 nothing needs to be done because REG can certainly go in a general reg.
12667 When -fpic is used, special handling is needed for symbolic references.
12668 See comments by legitimize_pic_address in i386.cc for details. */
12671 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
12673 bool changed
= false;
12676 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12678 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12679 if (GET_CODE (x
) == CONST
12680 && GET_CODE (XEXP (x
, 0)) == PLUS
12681 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12682 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12684 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12685 (enum tls_model
) log
, false);
12686 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12689 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12691 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
12696 if (flag_pic
&& SYMBOLIC_CONST (x
))
12697 return legitimize_pic_address (x
, 0);
12700 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12701 return machopic_indirect_data_reference (x
, 0);
12704 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12705 if (GET_CODE (x
) == ASHIFT
12706 && CONST_INT_P (XEXP (x
, 1))
12707 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12710 log
= INTVAL (XEXP (x
, 1));
12711 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12712 GEN_INT (1 << log
));
12715 if (GET_CODE (x
) == PLUS
)
12717 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12719 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12720 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12721 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12724 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12725 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12726 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12727 GEN_INT (1 << log
));
12730 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12731 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12732 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12735 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12736 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12737 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12738 GEN_INT (1 << log
));
12741 /* Put multiply first if it isn't already. */
12742 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12744 std::swap (XEXP (x
, 0), XEXP (x
, 1));
12748 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12749 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12750 created by virtual register instantiation, register elimination, and
12751 similar optimizations. */
12752 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12755 x
= gen_rtx_PLUS (Pmode
,
12756 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12757 XEXP (XEXP (x
, 1), 0)),
12758 XEXP (XEXP (x
, 1), 1));
12762 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12763 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12764 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12765 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12766 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12767 && CONSTANT_P (XEXP (x
, 1)))
12770 rtx other
= NULL_RTX
;
12772 if (CONST_INT_P (XEXP (x
, 1)))
12774 constant
= XEXP (x
, 1);
12775 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12777 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12779 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12780 other
= XEXP (x
, 1);
12788 x
= gen_rtx_PLUS (Pmode
,
12789 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12790 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12791 plus_constant (Pmode
, other
,
12792 INTVAL (constant
)));
12796 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12799 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12802 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
12805 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12808 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
12812 && REG_P (XEXP (x
, 1))
12813 && REG_P (XEXP (x
, 0)))
12816 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12819 x
= legitimize_pic_address (x
, 0);
12822 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12825 if (REG_P (XEXP (x
, 0)))
12827 rtx temp
= gen_reg_rtx (Pmode
);
12828 rtx val
= force_operand (XEXP (x
, 1), temp
);
12831 val
= convert_to_mode (Pmode
, val
, 1);
12832 emit_move_insn (temp
, val
);
12835 XEXP (x
, 1) = temp
;
12839 else if (REG_P (XEXP (x
, 1)))
12841 rtx temp
= gen_reg_rtx (Pmode
);
12842 rtx val
= force_operand (XEXP (x
, 0), temp
);
12845 val
= convert_to_mode (Pmode
, val
, 1);
12846 emit_move_insn (temp
, val
);
12849 XEXP (x
, 0) = temp
;
12857 /* Print an integer constant expression in assembler syntax. Addition
12858 and subtraction are the only arithmetic that may appear in these
12859 expressions. FILE is the stdio stream to write to, X is the rtx, and
12860 CODE is the operand print code from the output string. */
12863 output_pic_addr_const (FILE *file
, rtx x
, int code
)
12867 switch (GET_CODE (x
))
12870 gcc_assert (flag_pic
);
12875 if (TARGET_64BIT
|| ! TARGET_MACHO_SYMBOL_STUBS
)
12876 output_addr_const (file
, x
);
12879 const char *name
= XSTR (x
, 0);
12881 /* Mark the decl as referenced so that cgraph will
12882 output the function. */
12883 if (SYMBOL_REF_DECL (x
))
12884 mark_decl_referenced (SYMBOL_REF_DECL (x
));
12887 if (MACHOPIC_INDIRECT
12888 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
12889 name
= machopic_indirection_name (x
, /*stub_p=*/true);
12891 assemble_name (file
, name
);
12893 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
12894 && code
== 'P' && ix86_call_use_plt_p (x
))
12895 fputs ("@PLT", file
);
12902 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
12903 assemble_name (asm_out_file
, buf
);
12906 CASE_CONST_SCALAR_INT
:
12907 output_addr_const (file
, x
);
12911 /* This used to output parentheses around the expression,
12912 but that does not work on the 386 (either ATT or BSD assembler). */
12913 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12917 /* We can't handle floating point constants;
12918 TARGET_PRINT_OPERAND must handle them. */
12919 output_operand_lossage ("floating constant misused");
12923 /* Some assemblers need integer constants to appear first. */
12924 if (CONST_INT_P (XEXP (x
, 0)))
12926 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12928 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12932 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
12933 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12935 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12941 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
12942 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12944 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12946 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
12950 gcc_assert (XVECLEN (x
, 0) == 1);
12951 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
12952 switch (XINT (x
, 1))
12955 fputs ("@GOT", file
);
12957 case UNSPEC_GOTOFF
:
12958 fputs ("@GOTOFF", file
);
12960 case UNSPEC_PLTOFF
:
12961 fputs ("@PLTOFF", file
);
12964 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12965 "(%rip)" : "[rip]", file
);
12967 case UNSPEC_GOTPCREL
:
12968 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12969 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
12971 case UNSPEC_GOTTPOFF
:
12972 /* FIXME: This might be @TPOFF in Sun ld too. */
12973 fputs ("@gottpoff", file
);
12976 fputs ("@tpoff", file
);
12978 case UNSPEC_NTPOFF
:
12980 fputs ("@tpoff", file
);
12982 fputs ("@ntpoff", file
);
12984 case UNSPEC_DTPOFF
:
12985 fputs ("@dtpoff", file
);
12987 case UNSPEC_GOTNTPOFF
:
12989 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12990 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
12992 fputs ("@gotntpoff", file
);
12994 case UNSPEC_INDNTPOFF
:
12995 fputs ("@indntpoff", file
);
12998 case UNSPEC_MACHOPIC_OFFSET
:
13000 machopic_output_function_base_name (file
);
13004 output_operand_lossage ("invalid UNSPEC as operand");
13010 output_operand_lossage ("invalid expression as operand");
13014 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13015 We need to emit DTP-relative relocations. */
13017 static void ATTRIBUTE_UNUSED
13018 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13020 fputs (ASM_LONG
, file
);
13021 output_addr_const (file
, x
);
13022 fputs ("@dtpoff", file
);
13028 fputs (", 0", file
);
13031 gcc_unreachable ();
13035 /* Return true if X is a representation of the PIC register. This copes
13036 with calls from ix86_find_base_term, where the register might have
13037 been replaced by a cselib value. */
13040 ix86_pic_register_p (rtx x
)
13042 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13043 return (pic_offset_table_rtx
13044 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13045 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SET_GOT
)
13047 else if (!REG_P (x
))
13049 else if (pic_offset_table_rtx
)
13051 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
13053 if (HARD_REGISTER_P (x
)
13054 && !HARD_REGISTER_P (pic_offset_table_rtx
)
13055 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
13060 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13063 /* Helper function for ix86_delegitimize_address.
13064 Attempt to delegitimize TLS local-exec accesses. */
13067 ix86_delegitimize_tls_address (rtx orig_x
)
13069 rtx x
= orig_x
, unspec
;
13070 struct ix86_address addr
;
13072 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13076 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13078 if (ix86_decompose_address (x
, &addr
) == 0
13079 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13080 || addr
.disp
== NULL_RTX
13081 || GET_CODE (addr
.disp
) != CONST
)
13083 unspec
= XEXP (addr
.disp
, 0);
13084 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13085 unspec
= XEXP (unspec
, 0);
13086 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13088 x
= XVECEXP (unspec
, 0, 0);
13089 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13090 if (unspec
!= XEXP (addr
.disp
, 0))
13091 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13094 rtx idx
= addr
.index
;
13095 if (addr
.scale
!= 1)
13096 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13097 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13100 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13101 if (MEM_P (orig_x
))
13102 x
= replace_equiv_address_nv (orig_x
, x
);
13106 /* In the name of slightly smaller debug output, and to cater to
13107 general assembler lossage, recognize PIC+GOTOFF and turn it back
13108 into a direct symbol reference.
13110 On Darwin, this is necessary to avoid a crash, because Darwin
13111 has a different PIC label for each routine but the DWARF debugging
13112 information is not associated with any particular routine, so it's
13113 necessary to remove references to the PIC label from RTL stored by
13114 the DWARF output code.
13116 This helper is used in the normal ix86_delegitimize_address
13117 entrypoint (e.g. used in the target delegitimization hook) and
13118 in ix86_find_base_term. As compile time memory optimization, we
13119 avoid allocating rtxes that will not change anything on the outcome
13120 of the callers (find_base_value and find_base_term). */
13123 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
13125 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13126 /* addend is NULL or some rtx if x is something+GOTOFF where
13127 something doesn't include the PIC register. */
13128 rtx addend
= NULL_RTX
;
13129 /* reg_addend is NULL or a multiple of some register. */
13130 rtx reg_addend
= NULL_RTX
;
13131 /* const_addend is NULL or a const_int. */
13132 rtx const_addend
= NULL_RTX
;
13133 /* This is the result, or NULL. */
13134 rtx result
= NULL_RTX
;
13143 if (GET_CODE (x
) == CONST
13144 && GET_CODE (XEXP (x
, 0)) == PLUS
13145 && GET_MODE (XEXP (x
, 0)) == Pmode
13146 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13147 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13148 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13150 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
13151 base. A CONST can't be arg_pointer_rtx based. */
13152 if (base_term_p
&& MEM_P (orig_x
))
13154 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13155 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13156 if (MEM_P (orig_x
))
13157 x
= replace_equiv_address_nv (orig_x
, x
);
13161 if (GET_CODE (x
) == CONST
13162 && GET_CODE (XEXP (x
, 0)) == UNSPEC
13163 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
13164 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
13165 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
13167 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13168 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13170 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
13177 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
13178 return ix86_delegitimize_tls_address (orig_x
);
13180 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13181 and -mcmodel=medium -fpic. */
13184 if (GET_CODE (x
) != PLUS
13185 || GET_CODE (XEXP (x
, 1)) != CONST
)
13186 return ix86_delegitimize_tls_address (orig_x
);
13188 if (ix86_pic_register_p (XEXP (x
, 0)))
13189 /* %ebx + GOT/GOTOFF */
13191 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13193 /* %ebx + %reg * scale + GOT/GOTOFF */
13194 reg_addend
= XEXP (x
, 0);
13195 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13196 reg_addend
= XEXP (reg_addend
, 1);
13197 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13198 reg_addend
= XEXP (reg_addend
, 0);
13201 reg_addend
= NULL_RTX
;
13202 addend
= XEXP (x
, 0);
13206 addend
= XEXP (x
, 0);
13208 x
= XEXP (XEXP (x
, 1), 0);
13209 if (GET_CODE (x
) == PLUS
13210 && CONST_INT_P (XEXP (x
, 1)))
13212 const_addend
= XEXP (x
, 1);
13216 if (GET_CODE (x
) == UNSPEC
13217 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13218 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
13219 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
13220 && !MEM_P (orig_x
) && !addend
)))
13221 result
= XVECEXP (x
, 0, 0);
13223 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
13224 && !MEM_P (orig_x
))
13225 result
= XVECEXP (x
, 0, 0);
13228 return ix86_delegitimize_tls_address (orig_x
);
13230 /* For (PLUS something CONST_INT) both find_base_{value,term} just
13231 recurse on the first operand. */
13232 if (const_addend
&& !base_term_p
)
13233 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13235 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13238 /* If the rest of original X doesn't involve the PIC register, add
13239 addend and subtract pic_offset_table_rtx. This can happen e.g.
13241 leal (%ebx, %ecx, 4), %ecx
13243 movl foo@GOTOFF(%ecx), %edx
13244 in which case we return (%ecx - %ebx) + foo
13245 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
13246 and reload has completed. Don't do the latter for debug,
13247 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
13248 if (pic_offset_table_rtx
13249 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
13250 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13251 pic_offset_table_rtx
),
13253 else if (base_term_p
13254 && pic_offset_table_rtx
13256 && !TARGET_VXWORKS_RTP
)
13258 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
13259 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
13260 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
13265 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13267 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
13268 if (result
== NULL_RTX
)
13274 /* The normal instantiation of the above template. */
13277 ix86_delegitimize_address (rtx x
)
13279 return ix86_delegitimize_address_1 (x
, false);
13282 /* If X is a machine specific address (i.e. a symbol or label being
13283 referenced as a displacement from the GOT implemented using an
13284 UNSPEC), then return the base term. Otherwise return X. */
13287 ix86_find_base_term (rtx x
)
13293 if (GET_CODE (x
) != CONST
)
13295 term
= XEXP (x
, 0);
13296 if (GET_CODE (term
) == PLUS
13297 && CONST_INT_P (XEXP (term
, 1)))
13298 term
= XEXP (term
, 0);
13299 if (GET_CODE (term
) != UNSPEC
13300 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13301 && XINT (term
, 1) != UNSPEC_PCREL
))
13304 return XVECEXP (term
, 0, 0);
13307 return ix86_delegitimize_address_1 (x
, true);
13310 /* Return true if X shouldn't be emitted into the debug info.
13311 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
13312 symbol easily into the .debug_info section, so we need not to
13313 delegitimize, but instead assemble as @gotoff.
13314 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
13315 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
13318 ix86_const_not_ok_for_debug_p (rtx x
)
13320 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
13323 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
13330 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
13331 bool fp
, FILE *file
)
13333 const char *suffix
;
13335 if (mode
== CCFPmode
)
13337 code
= ix86_fp_compare_code_to_integer (code
);
13341 code
= reverse_condition (code
);
13346 gcc_assert (mode
!= CCGZmode
);
13370 gcc_assert (mode
!= CCGZmode
);
13394 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13398 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13399 Those same assemblers have the same but opposite lossage on cmov. */
13400 if (mode
== CCmode
)
13401 suffix
= fp
? "nbe" : "a";
13403 gcc_unreachable ();
13420 gcc_unreachable ();
13424 if (mode
== CCmode
|| mode
== CCGZmode
)
13426 else if (mode
== CCCmode
)
13427 suffix
= fp
? "b" : "c";
13429 gcc_unreachable ();
13446 gcc_unreachable ();
13450 if (mode
== CCmode
|| mode
== CCGZmode
)
13452 else if (mode
== CCCmode
)
13453 suffix
= fp
? "nb" : "nc";
13455 gcc_unreachable ();
13458 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13462 if (mode
== CCmode
)
13465 gcc_unreachable ();
13468 suffix
= fp
? "u" : "p";
13471 suffix
= fp
? "nu" : "np";
13474 gcc_unreachable ();
13476 fputs (suffix
, file
);
13479 /* Print the name of register X to FILE based on its machine mode and number.
13480 If CODE is 'w', pretend the mode is HImode.
13481 If CODE is 'b', pretend the mode is QImode.
13482 If CODE is 'k', pretend the mode is SImode.
13483 If CODE is 'q', pretend the mode is DImode.
13484 If CODE is 'x', pretend the mode is V4SFmode.
13485 If CODE is 't', pretend the mode is V8SFmode.
13486 If CODE is 'g', pretend the mode is V16SFmode.
13487 If CODE is 'h', pretend the reg is the 'high' byte register.
13488 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13489 If CODE is 'd', duplicate the operand for AVX instruction.
13490 If CODE is 'V', print naked full integer register name without %.
13494 print_reg (rtx x
, int code
, FILE *file
)
13498 unsigned int regno
;
13501 if (ASSEMBLER_DIALECT
== ASM_ATT
&& code
!= 'V')
13506 gcc_assert (TARGET_64BIT
);
13507 fputs ("rip", file
);
13511 if (code
== 'y' && STACK_TOP_P (x
))
13513 fputs ("st(0)", file
);
13519 else if (code
== 'b')
13521 else if (code
== 'k')
13523 else if (code
== 'q')
13525 else if (code
== 'h')
13527 else if (code
== 'x')
13529 else if (code
== 't')
13531 else if (code
== 'g')
13534 msize
= GET_MODE_SIZE (GET_MODE (x
));
13538 if (regno
== ARG_POINTER_REGNUM
13539 || regno
== FRAME_POINTER_REGNUM
13540 || regno
== FPSR_REG
)
13542 output_operand_lossage
13543 ("invalid use of register '%s'", reg_names
[regno
]);
13546 else if (regno
== FLAGS_REG
)
13548 output_operand_lossage ("invalid use of asm flag output");
13554 if (GENERAL_REGNO_P (regno
))
13555 msize
= GET_MODE_SIZE (word_mode
);
13557 error ("%<V%> modifier on non-integer register");
13560 duplicated
= code
== 'd' && TARGET_AVX
;
13567 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
13568 warning (0, "unsupported size for integer register");
13571 if (LEGACY_INT_REGNO_P (regno
))
13572 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
13576 reg
= hi_reg_name
[regno
];
13579 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13581 if (!ANY_QI_REGNO_P (regno
))
13582 error ("unsupported size for integer register");
13583 reg
= qi_reg_name
[regno
];
13586 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13588 reg
= qi_high_reg_name
[regno
];
13592 if (SSE_REGNO_P (regno
))
13594 gcc_assert (!duplicated
);
13595 putc (msize
== 32 ? 'y' : 'z', file
);
13596 reg
= hi_reg_name
[regno
] + 1;
13601 gcc_unreachable ();
13606 /* Irritatingly, AMD extended registers use
13607 different naming convention: "r%d[bwd]" */
13608 if (REX_INT_REGNO_P (regno
) || REX2_INT_REGNO_P (regno
))
13610 gcc_assert (TARGET_64BIT
);
13614 error ("extended registers have no high halves");
13629 error ("unsupported operand size for extended register");
13637 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13638 fprintf (file
, ", %%%s", reg
);
13640 fprintf (file
, ", %s", reg
);
13644 /* Meaning of CODE:
13645 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13646 C -- print opcode suffix for set/cmov insn.
13647 c -- like C, but print reversed condition
13648 F,f -- likewise, but for floating-point.
13649 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13651 R -- print embedded rounding and sae.
13652 r -- print only sae.
13653 z -- print the opcode suffix for the size of the current operand.
13654 Z -- likewise, with special suffixes for x87 instructions.
13655 * -- print a star (in certain assembler syntax)
13656 A -- print an absolute memory reference.
13657 E -- print address with DImode register names if TARGET_64BIT.
13658 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13659 s -- print a shift double count, followed by the assemblers argument
13661 b -- print the QImode name of the register for the indicated operand.
13662 %b0 would print %al if operands[0] is reg 0.
13663 w -- likewise, print the HImode name of the register.
13664 k -- likewise, print the SImode name of the register.
13665 q -- likewise, print the DImode name of the register.
13666 x -- likewise, print the V4SFmode name of the register.
13667 t -- likewise, print the V8SFmode name of the register.
13668 g -- likewise, print the V16SFmode name of the register.
13669 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13670 y -- print "st(0)" instead of "st" as a register.
13671 d -- print duplicated register operand for AVX instruction.
13672 D -- print condition for SSE cmp instruction.
13673 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13675 p -- print raw symbol name.
13676 X -- don't print any sort of PIC '@' suffix for a symbol.
13677 & -- print some in-use local-dynamic symbol name.
13678 H -- print a memory address offset by 8; used for sse high-parts
13679 Y -- print condition for XOP pcom* instruction.
13680 V -- print naked full integer register name without %.
13681 + -- print a branch hint as 'cs' or 'ds' prefix
13682 ; -- print a semicolon (after prefixes due to bug in older gas).
13683 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13684 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13685 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13686 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13687 N -- print maskz if it's constant 0 operand.
13691 ix86_print_operand (FILE *file
, rtx x
, int code
)
13698 switch (ASSEMBLER_DIALECT
)
13705 /* Intel syntax. For absolute addresses, registers should not
13706 be surrounded by braces. */
13710 ix86_print_operand (file
, x
, 0);
13717 gcc_unreachable ();
13720 ix86_print_operand (file
, x
, 0);
13724 /* Wrap address in an UNSPEC to declare special handling. */
13726 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13728 output_address (VOIDmode
, x
);
13732 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13737 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13742 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13747 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13752 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13757 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13762 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13763 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
13766 switch (GET_MODE_SIZE (GET_MODE (x
)))
13781 output_operand_lossage ("invalid operand size for operand "
13791 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13793 /* Opcodes don't get size suffixes if using Intel opcodes. */
13794 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13797 switch (GET_MODE_SIZE (GET_MODE (x
)))
13816 output_operand_lossage ("invalid operand size for operand "
13822 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13824 if (this_is_asm_operands
)
13825 warning_for_asm (this_is_asm_operands
,
13826 "non-integer operand used with operand code %<z%>");
13828 warning (0, "non-integer operand used with operand code %<z%>");
13833 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13834 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13837 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13839 switch (GET_MODE_SIZE (GET_MODE (x
)))
13842 #ifdef HAVE_AS_IX86_FILDS
13852 #ifdef HAVE_AS_IX86_FILDQ
13855 fputs ("ll", file
);
13863 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13865 /* 387 opcodes don't get size suffixes
13866 if the operands are registers. */
13867 if (STACK_REG_P (x
))
13870 switch (GET_MODE_SIZE (GET_MODE (x
)))
13891 output_operand_lossage ("invalid operand type used with "
13892 "operand code '%c'", code
);
13896 output_operand_lossage ("invalid operand size for operand code '%c'",
13917 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13919 ix86_print_operand (file
, x
, 0);
13920 fputs (", ", file
);
13925 switch (GET_CODE (x
))
13928 fputs ("neq", file
);
13931 fputs ("eq", file
);
13935 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
13939 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
13943 fputs ("le", file
);
13947 fputs ("lt", file
);
13950 fputs ("unord", file
);
13953 fputs ("ord", file
);
13956 fputs ("ueq", file
);
13959 fputs ("nlt", file
);
13962 fputs ("nle", file
);
13965 fputs ("ule", file
);
13968 fputs ("ult", file
);
13971 fputs ("une", file
);
13974 output_operand_lossage ("operand is not a condition code, "
13975 "invalid operand code 'Y'");
13981 /* Little bit of braindamage here. The SSE compare instructions
13982 does use completely different names for the comparisons that the
13983 fp conditional moves. */
13984 switch (GET_CODE (x
))
13989 fputs ("eq_us", file
);
13994 fputs ("eq", file
);
13999 fputs ("nge", file
);
14004 fputs ("lt", file
);
14009 fputs ("ngt", file
);
14014 fputs ("le", file
);
14017 fputs ("unord", file
);
14022 fputs ("neq_oq", file
);
14027 fputs ("neq", file
);
14032 fputs ("ge", file
);
14037 fputs ("nlt", file
);
14042 fputs ("gt", file
);
14047 fputs ("nle", file
);
14050 fputs ("ord", file
);
14053 output_operand_lossage ("operand is not a condition code, "
14054 "invalid operand code 'D'");
14061 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14062 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14064 gcc_fallthrough ();
14069 if (!COMPARISON_P (x
))
14071 output_operand_lossage ("operand is not a condition code, "
14072 "invalid operand code '%c'", code
);
14075 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14076 code
== 'c' || code
== 'f',
14077 code
== 'F' || code
== 'f',
14082 if (!offsettable_memref_p (x
))
14084 output_operand_lossage ("operand is not an offsettable memory "
14085 "reference, invalid operand code 'H'");
14088 /* It doesn't actually matter what mode we use here, as we're
14089 only going to use this for printing. */
14090 x
= adjust_address_nv (x
, DImode
, 8);
14091 /* Output 'qword ptr' for intel assembler dialect. */
14092 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14097 if (!CONST_INT_P (x
))
14099 output_operand_lossage ("operand is not an integer, invalid "
14100 "operand code 'K'");
14104 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14105 #ifdef HAVE_AS_IX86_HLE
14106 fputs ("xacquire ", file
);
14108 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14110 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14111 #ifdef HAVE_AS_IX86_HLE
14112 fputs ("xrelease ", file
);
14114 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14116 /* We do not want to print value of the operand. */
14120 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
14121 fputs ("{z}", file
);
14125 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
14127 output_operand_lossage ("operand is not a specific integer, "
14128 "invalid operand code 'r'");
14132 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14133 fputs (", ", file
);
14135 fputs ("{sae}", file
);
14137 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14138 fputs (", ", file
);
14143 if (!CONST_INT_P (x
))
14145 output_operand_lossage ("operand is not an integer, invalid "
14146 "operand code 'R'");
14150 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14151 fputs (", ", file
);
14153 switch (INTVAL (x
))
14155 case ROUND_NEAREST_INT
| ROUND_SAE
:
14156 fputs ("{rn-sae}", file
);
14158 case ROUND_NEG_INF
| ROUND_SAE
:
14159 fputs ("{rd-sae}", file
);
14161 case ROUND_POS_INF
| ROUND_SAE
:
14162 fputs ("{ru-sae}", file
);
14164 case ROUND_ZERO
| ROUND_SAE
:
14165 fputs ("{rz-sae}", file
);
14168 output_operand_lossage ("operand is not a specific integer, "
14169 "invalid operand code 'R'");
14172 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14173 fputs (", ", file
);
14178 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14184 const char *name
= get_some_local_dynamic_name ();
14186 output_operand_lossage ("'%%&' used without any "
14187 "local dynamic TLS references");
14189 assemble_name (file
, name
);
14198 || optimize_function_for_size_p (cfun
)
14199 || !TARGET_BRANCH_PREDICTION_HINTS
)
14202 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14205 int pred_val
= profile_probability::from_reg_br_prob_note
14206 (XINT (x
, 0)).to_reg_br_prob_base ();
14208 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14209 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14211 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14213 = final_forward_branch_p (current_output_insn
) == 0;
14215 /* Emit hints only in the case default branch prediction
14216 heuristics would fail. */
14217 if (taken
!= cputaken
)
14219 /* We use 3e (DS) prefix for taken branches and
14220 2e (CS) prefix for not taken branches. */
14222 fputs ("ds ; ", file
);
14224 fputs ("cs ; ", file
);
14232 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14238 putc (TARGET_AVX2
? 'i' : 'f', file
);
14244 /* NB: 32-bit indices in VSIB address are sign-extended
14245 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
14246 sign-extended to 0xfffffffff7fa3010 which is invalid
14247 address. Add addr32 prefix if there is no base
14248 register nor symbol. */
14250 struct ix86_address parts
;
14251 ok
= ix86_decompose_address (x
, &parts
);
14252 gcc_assert (ok
&& parts
.index
== NULL_RTX
);
14253 if (parts
.base
== NULL_RTX
14254 && (parts
.disp
== NULL_RTX
14255 || !symbolic_operand (parts
.disp
,
14256 GET_MODE (parts
.disp
))))
14257 fputs ("addr32 ", file
);
14262 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14263 fputs ("addr32 ", file
);
14267 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
14268 fputs ("notrack ", file
);
14272 output_operand_lossage ("invalid operand code '%c'", code
);
14277 print_reg (x
, code
, file
);
14279 else if (MEM_P (x
))
14281 rtx addr
= XEXP (x
, 0);
14283 /* No `byte ptr' prefix for call instructions ... */
14284 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
14286 machine_mode mode
= GET_MODE (x
);
14289 /* Check for explicit size override codes. */
14292 else if (code
== 'w')
14294 else if (code
== 'k')
14296 else if (code
== 'q')
14298 else if (code
== 'x')
14300 else if (code
== 't')
14302 else if (code
== 'g')
14304 else if (mode
== BLKmode
)
14305 /* ... or BLKmode operands, when not overridden. */
14308 switch (GET_MODE_SIZE (mode
))
14310 case 1: size
= "BYTE"; break;
14311 case 2: size
= "WORD"; break;
14312 case 4: size
= "DWORD"; break;
14313 case 8: size
= "QWORD"; break;
14314 case 12: size
= "TBYTE"; break;
14316 if (mode
== XFmode
)
14321 case 32: size
= "YMMWORD"; break;
14322 case 64: size
= "ZMMWORD"; break;
14324 gcc_unreachable ();
14328 fputs (size
, file
);
14329 fputs (" PTR ", file
);
14333 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
14334 output_operand_lossage ("invalid constraints for operand");
14336 ix86_print_operand_address_as
14337 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
14340 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == HFmode
)
14342 long l
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (x
),
14343 REAL_MODE_FORMAT (HFmode
));
14344 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14346 fprintf (file
, "0x%04x", (unsigned int) l
);
14349 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
14353 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
14355 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14357 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14359 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14360 (unsigned long long) (int) l
);
14362 fprintf (file
, "0x%08x", (unsigned int) l
);
14365 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
14369 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
14371 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14373 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14376 /* These float cases don't actually occur as immediate operands. */
14377 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
14381 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14382 fputs (dstr
, file
);
14385 /* Print bcst_mem_operand. */
14386 else if (GET_CODE (x
) == VEC_DUPLICATE
)
14388 machine_mode vmode
= GET_MODE (x
);
14389 /* Must be bcst_memory_operand. */
14390 gcc_assert (bcst_mem_operand (x
, vmode
));
14392 rtx mem
= XEXP (x
,0);
14393 ix86_print_operand (file
, mem
, 0);
14399 fputs ("{1to2}", file
);
14405 fputs ("{1to4}", file
);
14412 fputs ("{1to8}", file
);
14417 fputs ("{1to16}", file
);
14420 fputs ("{1to32}", file
);
14423 gcc_unreachable ();
14429 /* We have patterns that allow zero sets of memory, for instance.
14430 In 64-bit mode, we should probably support all 8-byte vectors,
14431 since we can in fact encode that into an immediate. */
14432 if (GET_CODE (x
) == CONST_VECTOR
)
14434 if (x
!= CONST0_RTX (GET_MODE (x
)))
14435 output_operand_lossage ("invalid vector immediate");
14441 if (ix86_force_load_from_GOT_p (x
, true))
14443 /* For inline assembly statement, load function address
14444 from GOT with 'P' operand modifier to avoid PLT. */
14445 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
14449 x
= gen_rtx_CONST (Pmode
, x
);
14450 x
= gen_const_mem (Pmode
, x
);
14451 ix86_print_operand (file
, x
, 'A');
14455 else if (code
!= 'p')
14457 if (CONST_INT_P (x
))
14459 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14462 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14463 || GET_CODE (x
) == LABEL_REF
)
14465 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14468 fputs ("OFFSET FLAT:", file
);
14471 if (CONST_INT_P (x
))
14472 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14473 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14474 output_pic_addr_const (file
, x
, code
);
14476 output_addr_const (file
, x
);
14481 ix86_print_operand_punct_valid_p (unsigned char code
)
14483 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
14484 || code
== '~' || code
== '^' || code
== '!');
14487 /* Print a memory operand whose address is ADDR. */
14490 ix86_print_operand_address_as (FILE *file
, rtx addr
,
14491 addr_space_t as
, bool raw
)
14493 struct ix86_address parts
;
14494 rtx base
, index
, disp
;
14500 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14502 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14503 gcc_assert (parts
.index
== NULL_RTX
);
14504 parts
.index
= XVECEXP (addr
, 0, 1);
14505 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14506 addr
= XVECEXP (addr
, 0, 0);
14509 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14511 gcc_assert (TARGET_64BIT
);
14512 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14516 ok
= ix86_decompose_address (addr
, &parts
);
14521 index
= parts
.index
;
14523 scale
= parts
.scale
;
14525 if (ADDR_SPACE_GENERIC_P (as
))
14528 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
14530 if (!ADDR_SPACE_GENERIC_P (as
) && !raw
)
14532 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14537 case ADDR_SPACE_SEG_FS
:
14538 fputs ("fs:", file
);
14540 case ADDR_SPACE_SEG_GS
:
14541 fputs ("gs:", file
);
14544 gcc_unreachable ();
14548 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14549 if (TARGET_64BIT
&& !base
&& !index
&& !raw
)
14553 if (GET_CODE (disp
) == CONST
14554 && GET_CODE (XEXP (disp
, 0)) == PLUS
14555 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14556 symbol
= XEXP (XEXP (disp
, 0), 0);
14558 if (GET_CODE (symbol
) == LABEL_REF
14559 || (GET_CODE (symbol
) == SYMBOL_REF
14560 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14564 if (!base
&& !index
)
14566 /* Displacement only requires special attention. */
14567 if (CONST_INT_P (disp
))
14569 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
14570 fputs ("ds:", file
);
14571 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14573 /* Load the external function address via the GOT slot to avoid PLT. */
14574 else if (GET_CODE (disp
) == CONST
14575 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
14576 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
14577 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
14578 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
14579 output_pic_addr_const (file
, disp
, 0);
14581 output_pic_addr_const (file
, disp
, 0);
14583 output_addr_const (file
, disp
);
14587 /* Print SImode register names to force addr32 prefix. */
14588 if (SImode_address_operand (addr
, VOIDmode
))
14592 gcc_assert (TARGET_64BIT
);
14593 switch (GET_CODE (addr
))
14596 gcc_assert (GET_MODE (addr
) == SImode
);
14597 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14601 gcc_assert (GET_MODE (addr
) == DImode
);
14604 gcc_unreachable ();
14607 gcc_assert (!code
);
14613 && CONST_INT_P (disp
)
14614 && INTVAL (disp
) < -16*1024*1024)
14616 /* X32 runs in 64-bit mode, where displacement, DISP, in
14617 address DISP(%r64), is encoded as 32-bit immediate sign-
14618 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14619 address is %r64 + 0xffffffffbffffd00. When %r64 <
14620 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14621 which is invalid for x32. The correct address is %r64
14622 - 0x40000300 == 0xf7ffdd64. To properly encode
14623 -0x40000300(%r64) for x32, we zero-extend negative
14624 displacement by forcing addr32 prefix which truncates
14625 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14626 zero-extend all negative displacements, including -1(%rsp).
14627 However, for small negative displacements, sign-extension
14628 won't cause overflow. We only zero-extend negative
14629 displacements if they < -16*1024*1024, which is also used
14630 to check legitimate address displacements for PIC. */
14634 /* Since the upper 32 bits of RSP are always zero for x32,
14635 we can encode %esp as %rsp to avoid 0x67 prefix if
14636 there is no index register. */
14637 if (TARGET_X32
&& Pmode
== SImode
14638 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
14641 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14646 output_pic_addr_const (file
, disp
, 0);
14647 else if (GET_CODE (disp
) == LABEL_REF
)
14648 output_asm_label (disp
);
14650 output_addr_const (file
, disp
);
14655 print_reg (base
, code
, file
);
14659 print_reg (index
, vsib
? 0 : code
, file
);
14660 if (scale
!= 1 || vsib
)
14661 fprintf (file
, ",%d", scale
);
14667 rtx offset
= NULL_RTX
;
14671 /* Pull out the offset of a symbol; print any symbol itself. */
14672 if (GET_CODE (disp
) == CONST
14673 && GET_CODE (XEXP (disp
, 0)) == PLUS
14674 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14676 offset
= XEXP (XEXP (disp
, 0), 1);
14677 disp
= gen_rtx_CONST (VOIDmode
,
14678 XEXP (XEXP (disp
, 0), 0));
14682 output_pic_addr_const (file
, disp
, 0);
14683 else if (GET_CODE (disp
) == LABEL_REF
)
14684 output_asm_label (disp
);
14685 else if (CONST_INT_P (disp
))
14688 output_addr_const (file
, disp
);
14694 print_reg (base
, code
, file
);
14697 if (INTVAL (offset
) >= 0)
14699 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14703 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14710 print_reg (index
, vsib
? 0 : code
, file
);
14711 if (scale
!= 1 || vsib
)
14712 fprintf (file
, "*%d", scale
);
14720 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
14722 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
14723 output_operand_lossage ("invalid constraints for operand");
14725 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
14728 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14731 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14735 if (GET_CODE (x
) != UNSPEC
)
14738 op
= XVECEXP (x
, 0, 0);
14739 switch (XINT (x
, 1))
14741 case UNSPEC_GOTOFF
:
14742 output_addr_const (file
, op
);
14743 fputs ("@gotoff", file
);
14745 case UNSPEC_GOTTPOFF
:
14746 output_addr_const (file
, op
);
14747 /* FIXME: This might be @TPOFF in Sun ld. */
14748 fputs ("@gottpoff", file
);
14751 output_addr_const (file
, op
);
14752 fputs ("@tpoff", file
);
14754 case UNSPEC_NTPOFF
:
14755 output_addr_const (file
, op
);
14757 fputs ("@tpoff", file
);
14759 fputs ("@ntpoff", file
);
14761 case UNSPEC_DTPOFF
:
14762 output_addr_const (file
, op
);
14763 fputs ("@dtpoff", file
);
14765 case UNSPEC_GOTNTPOFF
:
14766 output_addr_const (file
, op
);
14768 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14769 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14771 fputs ("@gotntpoff", file
);
14773 case UNSPEC_INDNTPOFF
:
14774 output_addr_const (file
, op
);
14775 fputs ("@indntpoff", file
);
14778 case UNSPEC_MACHOPIC_OFFSET
:
14779 output_addr_const (file
, op
);
14781 machopic_output_function_base_name (file
);
14793 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14794 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14795 is the expression of the binary operation. The output may either be
14796 emitted here, or returned to the caller, like all output_* functions.
14798 There is no guarantee that the operands are the same mode, as they
14799 might be within FLOAT or FLOAT_EXTEND expressions. */
14801 #ifndef SYSV386_COMPAT
14802 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14803 wants to fix the assemblers because that causes incompatibility
14804 with gcc. No-one wants to fix gcc because that causes
14805 incompatibility with assemblers... You can use the option of
14806 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14807 #define SYSV386_COMPAT 1
14811 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
14813 static char buf
[40];
14816 = (SSE_REG_P (operands
[0])
14817 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
14821 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14822 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14829 switch (GET_CODE (operands
[3]))
14840 gcc_unreachable ();
14847 p
= GET_MODE (operands
[0]) == SFmode
? "ss" : "sd";
14851 p
= "\t{%2, %1, %0|%0, %1, %2}";
14853 p
= "\t{%2, %0|%0, %2}";
14859 /* Even if we do not want to check the inputs, this documents input
14860 constraints. Which helps in understanding the following code. */
14863 if (STACK_REG_P (operands
[0])
14864 && ((REG_P (operands
[1])
14865 && REGNO (operands
[0]) == REGNO (operands
[1])
14866 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14867 || (REG_P (operands
[2])
14868 && REGNO (operands
[0]) == REGNO (operands
[2])
14869 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14870 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14873 gcc_unreachable ();
14876 switch (GET_CODE (operands
[3]))
14880 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14881 std::swap (operands
[1], operands
[2]);
14883 /* know operands[0] == operands[1]. */
14885 if (MEM_P (operands
[2]))
14891 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14893 if (STACK_TOP_P (operands
[0]))
14894 /* How is it that we are storing to a dead operand[2]?
14895 Well, presumably operands[1] is dead too. We can't
14896 store the result to st(0) as st(0) gets popped on this
14897 instruction. Instead store to operands[2] (which I
14898 think has to be st(1)). st(1) will be popped later.
14899 gcc <= 2.8.1 didn't have this check and generated
14900 assembly code that the Unixware assembler rejected. */
14901 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14903 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14907 if (STACK_TOP_P (operands
[0]))
14908 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14910 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14915 if (MEM_P (operands
[1]))
14921 if (MEM_P (operands
[2]))
14927 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14930 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14931 derived assemblers, confusingly reverse the direction of
14932 the operation for fsub{r} and fdiv{r} when the
14933 destination register is not st(0). The Intel assembler
14934 doesn't have this brain damage. Read !SYSV386_COMPAT to
14935 figure out what the hardware really does. */
14936 if (STACK_TOP_P (operands
[0]))
14937 p
= "{p\t%0, %2|rp\t%2, %0}";
14939 p
= "{rp\t%2, %0|p\t%0, %2}";
14941 if (STACK_TOP_P (operands
[0]))
14942 /* As above for fmul/fadd, we can't store to st(0). */
14943 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14945 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14950 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14953 if (STACK_TOP_P (operands
[0]))
14954 p
= "{rp\t%0, %1|p\t%1, %0}";
14956 p
= "{p\t%1, %0|rp\t%0, %1}";
14958 if (STACK_TOP_P (operands
[0]))
14959 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14961 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14966 if (STACK_TOP_P (operands
[0]))
14968 if (STACK_TOP_P (operands
[1]))
14969 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14971 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14974 else if (STACK_TOP_P (operands
[1]))
14977 p
= "{\t%1, %0|r\t%0, %1}";
14979 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14985 p
= "{r\t%2, %0|\t%0, %2}";
14987 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14993 gcc_unreachable ();
15000 /* Return needed mode for entity in optimize_mode_switching pass. */
15003 ix86_dirflag_mode_needed (rtx_insn
*insn
)
15007 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
15008 return X86_DIRFLAG_ANY
;
15010 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
15011 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
15014 if (recog_memoized (insn
) < 0)
15015 return X86_DIRFLAG_ANY
;
15017 if (get_attr_type (insn
) == TYPE_STR
)
15019 /* Emit cld instruction if stringops are used in the function. */
15020 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
15021 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
15023 return X86_DIRFLAG_RESET
;
15026 return X86_DIRFLAG_ANY
;
15029 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
15032 ix86_check_avx_upper_register (const_rtx exp
)
15034 return (SSE_REG_P (exp
)
15035 && !EXT_REX_SSE_REG_P (exp
)
15036 && GET_MODE_BITSIZE (GET_MODE (exp
)) > 128);
15039 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
15042 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
15044 if (ix86_check_avx_upper_register (dest
))
15046 bool *used
= (bool *) data
;
15051 /* Return needed mode for entity in optimize_mode_switching pass. */
15054 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
15056 if (DEBUG_INSN_P (insn
))
15057 return AVX_U128_ANY
;
15063 /* Needed mode is set to AVX_U128_CLEAN if there are
15064 no 256bit or 512bit modes used in function arguments. */
15065 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15067 link
= XEXP (link
, 1))
15069 if (GET_CODE (XEXP (link
, 0)) == USE
)
15071 rtx arg
= XEXP (XEXP (link
, 0), 0);
15073 if (ix86_check_avx_upper_register (arg
))
15074 return AVX_U128_DIRTY
;
15078 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
15079 nor 512bit registers used in the function return register. */
15080 bool avx_upper_reg_found
= false;
15081 note_stores (insn
, ix86_check_avx_upper_stores
,
15082 &avx_upper_reg_found
);
15083 if (avx_upper_reg_found
)
15084 return AVX_U128_DIRTY
;
15086 /* If the function is known to preserve some SSE registers,
15087 RA and previous passes can legitimately rely on that for
15088 modes wider than 256 bits. It's only safe to issue a
15089 vzeroupper if all SSE registers are clobbered. */
15090 const function_abi
&abi
= insn_callee_abi (insn
);
15091 if (vzeroupper_pattern (PATTERN (insn
), VOIDmode
)
15092 /* Should be safe to issue an vzeroupper before sibling_call_p.
15093 Also there not mode_exit for sibling_call, so there could be
15094 missing vzeroupper for that. */
15095 || !(SIBLING_CALL_P (insn
)
15096 || hard_reg_set_subset_p (reg_class_contents
[SSE_REGS
],
15097 abi
.mode_clobbers (V4DImode
))))
15098 return AVX_U128_ANY
;
15100 return AVX_U128_CLEAN
;
15103 subrtx_iterator::array_type array
;
15105 rtx set
= single_set (insn
);
15108 rtx dest
= SET_DEST (set
);
15109 rtx src
= SET_SRC (set
);
15110 if (ix86_check_avx_upper_register (dest
))
15112 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
15113 source isn't zero. */
15114 if (standard_sse_constant_p (src
, GET_MODE (dest
)) != 1)
15115 return AVX_U128_DIRTY
;
15117 return AVX_U128_ANY
;
15121 FOR_EACH_SUBRTX (iter
, array
, src
, NONCONST
)
15122 if (ix86_check_avx_upper_register (*iter
))
15123 return AVX_U128_DIRTY
;
15126 /* This isn't YMM/ZMM load/store. */
15127 return AVX_U128_ANY
;
15130 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
15131 Hardware changes state only when a 256bit register is written to,
15132 but we need to prevent the compiler from moving optimal insertion
15133 point above eventual read from 256bit or 512 bit register. */
15134 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15135 if (ix86_check_avx_upper_register (*iter
))
15136 return AVX_U128_DIRTY
;
15138 return AVX_U128_ANY
;
15141 /* Return mode that i387 must be switched into
15142 prior to the execution of insn. */
15145 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
15147 enum attr_i387_cw mode
;
15149 /* The mode UNINITIALIZED is used to store control word after a
15150 function call or ASM pattern. The mode ANY specify that function
15151 has no requirements on the control word and make no changes in the
15152 bits we are interested in. */
15155 || (NONJUMP_INSN_P (insn
)
15156 && (asm_noperands (PATTERN (insn
)) >= 0
15157 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15158 return I387_CW_UNINITIALIZED
;
15160 if (recog_memoized (insn
) < 0)
15161 return I387_CW_ANY
;
15163 mode
= get_attr_i387_cw (insn
);
15167 case I387_ROUNDEVEN
:
15168 if (mode
== I387_CW_ROUNDEVEN
)
15173 if (mode
== I387_CW_TRUNC
)
15178 if (mode
== I387_CW_FLOOR
)
15183 if (mode
== I387_CW_CEIL
)
15188 gcc_unreachable ();
15191 return I387_CW_ANY
;
15194 /* Return mode that entity must be switched into
15195 prior to the execution of insn. */
15198 ix86_mode_needed (int entity
, rtx_insn
*insn
, HARD_REG_SET
)
15203 return ix86_dirflag_mode_needed (insn
);
15205 return ix86_avx_u128_mode_needed (insn
);
15206 case I387_ROUNDEVEN
:
15210 return ix86_i387_mode_needed (entity
, insn
);
15212 gcc_unreachable ();
15217 /* Calculate mode of upper 128bit AVX registers after the insn. */
15220 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
15222 rtx pat
= PATTERN (insn
);
15224 if (vzeroupper_pattern (pat
, VOIDmode
)
15225 || vzeroall_pattern (pat
, VOIDmode
))
15226 return AVX_U128_CLEAN
;
15228 /* We know that state is clean after CALL insn if there are no
15229 256bit or 512bit registers used in the function return register. */
15232 bool avx_upper_reg_found
= false;
15233 note_stores (insn
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
15235 if (avx_upper_reg_found
)
15236 return AVX_U128_DIRTY
;
15238 /* If the function desn't clobber any sse registers or only clobber
15239 128-bit part, Then vzeroupper isn't issued before the function exit.
15240 the status not CLEAN but ANY after the function. */
15241 const function_abi
&abi
= insn_callee_abi (insn
);
15242 if (!(SIBLING_CALL_P (insn
)
15243 || hard_reg_set_subset_p (reg_class_contents
[SSE_REGS
],
15244 abi
.mode_clobbers (V4DImode
))))
15245 return AVX_U128_ANY
;
15247 return AVX_U128_CLEAN
;
15250 /* Otherwise, return current mode. Remember that if insn
15251 references AVX 256bit or 512bit registers, the mode was already
15252 changed to DIRTY from MODE_NEEDED. */
15256 /* Return the mode that an insn results in. */
15259 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
, HARD_REG_SET
)
15266 return ix86_avx_u128_mode_after (mode
, insn
);
15267 case I387_ROUNDEVEN
:
15273 gcc_unreachable ();
15278 ix86_dirflag_mode_entry (void)
15280 /* For TARGET_CLD or in the interrupt handler we can't assume
15281 direction flag state at function entry. */
15283 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
15284 return X86_DIRFLAG_ANY
;
15286 return X86_DIRFLAG_RESET
;
15290 ix86_avx_u128_mode_entry (void)
15294 /* Entry mode is set to AVX_U128_DIRTY if there are
15295 256bit or 512bit modes used in function arguments. */
15296 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15297 arg
= TREE_CHAIN (arg
))
15299 rtx incoming
= DECL_INCOMING_RTL (arg
);
15301 if (incoming
&& ix86_check_avx_upper_register (incoming
))
15302 return AVX_U128_DIRTY
;
15305 return AVX_U128_CLEAN
;
15308 /* Return a mode that ENTITY is assumed to be
15309 switched to at function entry. */
15312 ix86_mode_entry (int entity
)
15317 return ix86_dirflag_mode_entry ();
15319 return ix86_avx_u128_mode_entry ();
15320 case I387_ROUNDEVEN
:
15324 return I387_CW_ANY
;
15326 gcc_unreachable ();
15331 ix86_avx_u128_mode_exit (void)
15333 rtx reg
= crtl
->return_rtx
;
15335 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
15336 or 512 bit modes used in the function return register. */
15337 if (reg
&& ix86_check_avx_upper_register (reg
))
15338 return AVX_U128_DIRTY
;
15340 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
15341 modes used in function arguments, otherwise return AVX_U128_CLEAN.
15343 return ix86_avx_u128_mode_entry ();
15346 /* Return a mode that ENTITY is assumed to be
15347 switched to at function exit. */
15350 ix86_mode_exit (int entity
)
15355 return X86_DIRFLAG_ANY
;
15357 return ix86_avx_u128_mode_exit ();
15358 case I387_ROUNDEVEN
:
15362 return I387_CW_ANY
;
15364 gcc_unreachable ();
15369 ix86_mode_priority (int, int n
)
15374 /* Output code to initialize control word copies used by trunc?f?i and
15375 rounding patterns. CURRENT_MODE is set to current control word,
15376 while NEW_MODE is set to new control word. */
15379 emit_i387_cw_initialization (int mode
)
15381 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15384 enum ix86_stack_slot slot
;
15386 rtx reg
= gen_reg_rtx (HImode
);
15388 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15389 emit_move_insn (reg
, copy_rtx (stored_mode
));
15393 case I387_CW_ROUNDEVEN
:
15394 /* round to nearest */
15395 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15396 slot
= SLOT_CW_ROUNDEVEN
;
15399 case I387_CW_TRUNC
:
15400 /* round toward zero (truncate) */
15401 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15402 slot
= SLOT_CW_TRUNC
;
15405 case I387_CW_FLOOR
:
15406 /* round down toward -oo */
15407 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15408 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15409 slot
= SLOT_CW_FLOOR
;
15413 /* round up toward +oo */
15414 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15415 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15416 slot
= SLOT_CW_CEIL
;
15420 gcc_unreachable ();
15423 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15425 new_mode
= assign_386_stack_local (HImode
, slot
);
15426 emit_move_insn (new_mode
, reg
);
15429 /* Generate one or more insns to set ENTITY to MODE. */
15432 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
15433 HARD_REG_SET regs_live ATTRIBUTE_UNUSED
)
15438 if (mode
== X86_DIRFLAG_RESET
)
15439 emit_insn (gen_cld ());
15442 if (mode
== AVX_U128_CLEAN
)
15443 ix86_expand_avx_vzeroupper ();
15445 case I387_ROUNDEVEN
:
15449 if (mode
!= I387_CW_ANY
15450 && mode
!= I387_CW_UNINITIALIZED
)
15451 emit_i387_cw_initialization (mode
);
15454 gcc_unreachable ();
15458 /* Output code for INSN to convert a float to a signed int. OPERANDS
15459 are the insn operands. The output may be [HSD]Imode and the input
15460 operand may be [SDX]Fmode. */
15463 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
15465 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
15466 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
15467 int round_mode
= get_attr_i387_cw (insn
);
15469 static char buf
[40];
15472 /* Jump through a hoop or two for DImode, since the hardware has no
15473 non-popping instruction. We used to do this a different way, but
15474 that was somewhat fragile and broke with post-reload splitters. */
15475 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15476 output_asm_insn ("fld\t%y1", operands
);
15478 gcc_assert (STACK_TOP_P (operands
[1]));
15479 gcc_assert (MEM_P (operands
[0]));
15480 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15483 return "fisttp%Z0\t%0";
15485 strcpy (buf
, "fist");
15487 if (round_mode
!= I387_CW_ANY
)
15488 output_asm_insn ("fldcw\t%3", operands
);
15491 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
15493 output_asm_insn (buf
, operands
);
15495 if (round_mode
!= I387_CW_ANY
)
15496 output_asm_insn ("fldcw\t%2", operands
);
15501 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15502 have the values zero or one, indicates the ffreep insn's operand
15503 from the OPERANDS array. */
15505 static const char *
15506 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15508 if (TARGET_USE_FFREEP
)
15509 #ifdef HAVE_AS_IX86_FFREEP
15510 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15513 static char retval
[32];
15514 int regno
= REGNO (operands
[opno
]);
15516 gcc_assert (STACK_REGNO_P (regno
));
15518 regno
-= FIRST_STACK_REG
;
15520 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15525 return opno
? "fstp\t%y1" : "fstp\t%y0";
15529 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15530 should be used. UNORDERED_P is true when fucom should be used. */
15533 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
15534 bool eflags_p
, bool unordered_p
)
15536 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
15537 bool stack_top_dies
;
15539 static char buf
[40];
15542 gcc_assert (STACK_TOP_P (xops
[0]));
15544 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
15548 p
= unordered_p
? "fucomi" : "fcomi";
15551 p
= "p\t{%y1, %0|%0, %y1}";
15552 strcat (buf
, p
+ !stack_top_dies
);
15557 if (STACK_REG_P (xops
[1])
15559 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
15561 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
15563 /* If both the top of the 387 stack die, and the other operand
15564 is also a stack register that dies, then this must be a
15565 `fcompp' float compare. */
15566 p
= unordered_p
? "fucompp" : "fcompp";
15569 else if (const0_operand (xops
[1], VOIDmode
))
15571 gcc_assert (!unordered_p
);
15572 strcpy (buf
, "ftst");
15576 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
15578 gcc_assert (!unordered_p
);
15582 p
= unordered_p
? "fucom" : "fcom";
15587 strcat (buf
, p
+ !stack_top_dies
);
15590 output_asm_insn (buf
, operands
);
15591 return "fnstsw\t%0";
15595 ix86_output_addr_vec_elt (FILE *file
, int value
)
15597 const char *directive
= ASM_LONG
;
15601 directive
= ASM_QUAD
;
15603 gcc_assert (!TARGET_64BIT
);
15606 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15610 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15612 const char *directive
= ASM_LONG
;
15615 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15616 directive
= ASM_QUAD
;
15618 gcc_assert (!TARGET_64BIT
);
15620 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15621 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15622 fprintf (file
, "%s%s%d-%s%d\n",
15623 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15625 else if (TARGET_MACHO
)
15627 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15628 machopic_output_function_base_name (file
);
15632 else if (HAVE_AS_GOTOFF_IN_DATA
)
15633 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15635 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15636 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15639 #define LEA_MAX_STALL (3)
15640 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15642 /* Increase given DISTANCE in half-cycles according to
15643 dependencies between PREV and NEXT instructions.
15644 Add 1 half-cycle if there is no dependency and
15645 go to next cycle if there is some dependecy. */
15647 static unsigned int
15648 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
15652 if (!prev
|| !next
)
15653 return distance
+ (distance
& 1) + 2;
15655 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
15656 return distance
+ 1;
15658 FOR_EACH_INSN_USE (use
, next
)
15659 FOR_EACH_INSN_DEF (def
, prev
)
15660 if (!DF_REF_IS_ARTIFICIAL (def
)
15661 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
15662 return distance
+ (distance
& 1) + 2;
15664 return distance
+ 1;
15667 /* Function checks if instruction INSN defines register number
15668 REGNO1 or REGNO2. */
15671 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
15676 FOR_EACH_INSN_DEF (def
, insn
)
15677 if (DF_REF_REG_DEF_P (def
)
15678 && !DF_REF_IS_ARTIFICIAL (def
)
15679 && (regno1
== DF_REF_REGNO (def
)
15680 || regno2
== DF_REF_REGNO (def
)))
15686 /* Function checks if instruction INSN uses register number
15687 REGNO as a part of address expression. */
15690 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
15694 FOR_EACH_INSN_USE (use
, insn
)
15695 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
15701 /* Search backward for non-agu definition of register number REGNO1
15702 or register number REGNO2 in basic block starting from instruction
15703 START up to head of basic block or instruction INSN.
15705 Function puts true value into *FOUND var if definition was found
15706 and false otherwise.
15708 Distance in half-cycles between START and found instruction or head
15709 of BB is added to DISTANCE and returned. */
15712 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
15713 rtx_insn
*insn
, int distance
,
15714 rtx_insn
*start
, bool *found
)
15716 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
15717 rtx_insn
*prev
= start
;
15718 rtx_insn
*next
= NULL
;
15724 && distance
< LEA_SEARCH_THRESHOLD
)
15726 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
15728 distance
= increase_distance (prev
, next
, distance
);
15729 if (insn_defines_reg (regno1
, regno2
, prev
))
15731 if (recog_memoized (prev
) < 0
15732 || get_attr_type (prev
) != TYPE_LEA
)
15741 if (prev
== BB_HEAD (bb
))
15744 prev
= PREV_INSN (prev
);
15750 /* Search backward for non-agu definition of register number REGNO1
15751 or register number REGNO2 in INSN's basic block until
15752 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15753 2. Reach neighbor BBs boundary, or
15754 3. Reach agu definition.
15755 Returns the distance between the non-agu definition point and INSN.
15756 If no definition point, returns -1. */
15759 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
15762 basic_block bb
= BLOCK_FOR_INSN (insn
);
15764 bool found
= false;
15766 if (insn
!= BB_HEAD (bb
))
15767 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
15768 distance
, PREV_INSN (insn
),
15771 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
15775 bool simple_loop
= false;
15777 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
15780 simple_loop
= true;
15785 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
15787 BB_END (bb
), &found
);
15790 int shortest_dist
= -1;
15791 bool found_in_bb
= false;
15793 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
15796 = distance_non_agu_define_in_bb (regno1
, regno2
,
15802 if (shortest_dist
< 0)
15803 shortest_dist
= bb_dist
;
15804 else if (bb_dist
> 0)
15805 shortest_dist
= MIN (bb_dist
, shortest_dist
);
15811 distance
= shortest_dist
;
15818 return distance
>> 1;
15821 /* Return the distance in half-cycles between INSN and the next
15822 insn that uses register number REGNO in memory address added
15823 to DISTANCE. Return -1 if REGNO0 is set.
15825 Put true value into *FOUND if register usage was found and
15827 Put true value into *REDEFINED if register redefinition was
15828 found and false otherwise. */
15831 distance_agu_use_in_bb (unsigned int regno
,
15832 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
15833 bool *found
, bool *redefined
)
15835 basic_block bb
= NULL
;
15836 rtx_insn
*next
= start
;
15837 rtx_insn
*prev
= NULL
;
15840 *redefined
= false;
15842 if (start
!= NULL_RTX
)
15844 bb
= BLOCK_FOR_INSN (start
);
15845 if (start
!= BB_HEAD (bb
))
15846 /* If insn and start belong to the same bb, set prev to insn,
15847 so the call to increase_distance will increase the distance
15848 between insns by 1. */
15854 && distance
< LEA_SEARCH_THRESHOLD
)
15856 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
15858 distance
= increase_distance(prev
, next
, distance
);
15859 if (insn_uses_reg_mem (regno
, next
))
15861 /* Return DISTANCE if OP0 is used in memory
15862 address in NEXT. */
15867 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
15869 /* Return -1 if OP0 is set in NEXT. */
15877 if (next
== BB_END (bb
))
15880 next
= NEXT_INSN (next
);
15886 /* Return the distance between INSN and the next insn that uses
15887 register number REGNO0 in memory address. Return -1 if no such
15888 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15891 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
15893 basic_block bb
= BLOCK_FOR_INSN (insn
);
15895 bool found
= false;
15896 bool redefined
= false;
15898 if (insn
!= BB_END (bb
))
15899 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
15901 &found
, &redefined
);
15903 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
15907 bool simple_loop
= false;
15909 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
15912 simple_loop
= true;
15917 distance
= distance_agu_use_in_bb (regno0
, insn
,
15918 distance
, BB_HEAD (bb
),
15919 &found
, &redefined
);
15922 int shortest_dist
= -1;
15923 bool found_in_bb
= false;
15924 bool redefined_in_bb
= false;
15926 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
15929 = distance_agu_use_in_bb (regno0
, insn
,
15930 distance
, BB_HEAD (e
->dest
),
15931 &found_in_bb
, &redefined_in_bb
);
15934 if (shortest_dist
< 0)
15935 shortest_dist
= bb_dist
;
15936 else if (bb_dist
> 0)
15937 shortest_dist
= MIN (bb_dist
, shortest_dist
);
15943 distance
= shortest_dist
;
15947 if (!found
|| redefined
)
15950 return distance
>> 1;
15953 /* Define this macro to tune LEA priority vs ADD, it take effect when
15954 there is a dilemma of choosing LEA or ADD
15955 Negative value: ADD is more preferred than LEA
15957 Positive value: LEA is more preferred than ADD. */
15958 #define IX86_LEA_PRIORITY 0
15960 /* Return true if usage of lea INSN has performance advantage
15961 over a sequence of instructions. Instructions sequence has
15962 SPLIT_COST cycles higher latency than lea latency. */
15965 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
15966 unsigned int regno2
, int split_cost
, bool has_scale
)
15968 int dist_define
, dist_use
;
15970 /* For Atom processors newer than Bonnell, if using a 2-source or
15971 3-source LEA for non-destructive destination purposes, or due to
15972 wanting ability to use SCALE, the use of LEA is justified. */
15973 if (!TARGET_CPU_P (BONNELL
))
15977 if (split_cost
< 1)
15979 if (regno0
== regno1
|| regno0
== regno2
)
15984 /* Remember recog_data content. */
15985 struct recog_data_d recog_data_save
= recog_data
;
15987 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
15988 dist_use
= distance_agu_use (regno0
, insn
);
15990 /* distance_non_agu_define can call get_attr_type which can call
15991 recog_memoized, restore recog_data back to previous content. */
15992 recog_data
= recog_data_save
;
15994 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
15996 /* If there is no non AGU operand definition, no AGU
15997 operand usage and split cost is 0 then both lea
15998 and non lea variants have same priority. Currently
15999 we prefer lea for 64 bit code and non lea on 32 bit
16001 if (dist_use
< 0 && split_cost
== 0)
16002 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16007 /* With longer definitions distance lea is more preferable.
16008 Here we change it to take into account splitting cost and
16010 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16012 /* If there is no use in memory addess then we just check
16013 that split cost exceeds AGU stall. */
16015 return dist_define
> LEA_MAX_STALL
;
16017 /* If this insn has both backward non-agu dependence and forward
16018 agu dependence, the one with short distance takes effect. */
16019 return dist_define
>= dist_use
;
16022 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16023 move and add to avoid AGU stalls. */
16026 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
16028 unsigned int regno0
, regno1
, regno2
;
16030 /* Check if we need to optimize. */
16031 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16034 regno0
= true_regnum (operands
[0]);
16035 regno1
= true_regnum (operands
[1]);
16036 regno2
= true_regnum (operands
[2]);
16038 /* We need to split only adds with non destructive
16039 destination operand. */
16040 if (regno0
== regno1
|| regno0
== regno2
)
16043 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
16046 /* Return true if we should emit lea instruction instead of mov
16050 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
16052 unsigned int regno0
, regno1
;
16054 /* Check if we need to optimize. */
16055 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16058 /* Use lea for reg to reg moves only. */
16059 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16062 regno0
= true_regnum (operands
[0]);
16063 regno1
= true_regnum (operands
[1]);
16065 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
16068 /* Return true if we need to split lea into a sequence of
16069 instructions to avoid AGU stalls during peephole2. */
16072 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
16074 unsigned int regno0
, regno1
, regno2
;
16076 struct ix86_address parts
;
16079 /* The "at least two components" test below might not catch simple
16080 move or zero extension insns if parts.base is non-NULL and parts.disp
16081 is const0_rtx as the only components in the address, e.g. if the
16082 register is %rbp or %r13. As this test is much cheaper and moves or
16083 zero extensions are the common case, do this check first. */
16084 if (REG_P (operands
[1])
16085 || (SImode_address_operand (operands
[1], VOIDmode
)
16086 && REG_P (XEXP (operands
[1], 0))))
16089 ok
= ix86_decompose_address (operands
[1], &parts
);
16092 /* There should be at least two components in the address. */
16093 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
16094 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
16097 /* We should not split into add if non legitimate pic
16098 operand is used as displacement. */
16099 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16102 regno0
= true_regnum (operands
[0]) ;
16103 regno1
= INVALID_REGNUM
;
16104 regno2
= INVALID_REGNUM
;
16107 regno1
= true_regnum (parts
.base
);
16109 regno2
= true_regnum (parts
.index
);
16111 /* Use add for a = a + b and a = b + a since it is faster and shorter
16112 than lea for most processors. For the processors like BONNELL, if
16113 the destination register of LEA holds an actual address which will
16114 be used soon, LEA is better and otherwise ADD is better. */
16115 if (!TARGET_CPU_P (BONNELL
)
16116 && parts
.scale
== 1
16117 && (!parts
.disp
|| parts
.disp
== const0_rtx
)
16118 && (regno0
== regno1
|| regno0
== regno2
))
16121 /* Split with -Oz if the encoding requires fewer bytes. */
16122 if (optimize_size
> 1
16125 && (!parts
.disp
|| parts
.disp
== const0_rtx
))
16128 /* Check we need to optimize. */
16129 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
16134 /* Compute how many cycles we will add to execution time
16135 if split lea into a sequence of instructions. */
16136 if (parts
.base
|| parts
.index
)
16138 /* Have to use mov instruction if non desctructive
16139 destination form is used. */
16140 if (regno1
!= regno0
&& regno2
!= regno0
)
16143 /* Have to add index to base if both exist. */
16144 if (parts
.base
&& parts
.index
)
16147 /* Have to use shift and adds if scale is 2 or greater. */
16148 if (parts
.scale
> 1)
16150 if (regno0
!= regno1
)
16152 else if (regno2
== regno0
)
16155 split_cost
+= parts
.scale
;
16158 /* Have to use add instruction with immediate if
16159 disp is non zero. */
16160 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16163 /* Subtract the price of lea. */
16167 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
16171 /* Return true if it is ok to optimize an ADD operation to LEA
16172 operation to avoid flag register consumation. For most processors,
16173 ADD is faster than LEA. For the processors like BONNELL, if the
16174 destination register of LEA holds an actual address which will be
16175 used soon, LEA is better and otherwise ADD is better. */
16178 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
16180 unsigned int regno0
= true_regnum (operands
[0]);
16181 unsigned int regno1
= true_regnum (operands
[1]);
16182 unsigned int regno2
= true_regnum (operands
[2]);
16184 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16185 if (regno0
!= regno1
&& regno0
!= regno2
)
16188 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16191 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
16194 /* Return true if destination reg of SET_BODY is shift count of
16198 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
16204 /* Retrieve destination of SET_BODY. */
16205 switch (GET_CODE (set_body
))
16208 set_dest
= SET_DEST (set_body
);
16209 if (!set_dest
|| !REG_P (set_dest
))
16213 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
16214 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
16222 /* Retrieve shift count of USE_BODY. */
16223 switch (GET_CODE (use_body
))
16226 shift_rtx
= XEXP (use_body
, 1);
16229 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
16230 if (ix86_dep_by_shift_count_body (set_body
,
16231 XVECEXP (use_body
, 0, i
)))
16239 && (GET_CODE (shift_rtx
) == ASHIFT
16240 || GET_CODE (shift_rtx
) == LSHIFTRT
16241 || GET_CODE (shift_rtx
) == ASHIFTRT
16242 || GET_CODE (shift_rtx
) == ROTATE
16243 || GET_CODE (shift_rtx
) == ROTATERT
))
16245 rtx shift_count
= XEXP (shift_rtx
, 1);
16247 /* Return true if shift count is dest of SET_BODY. */
16248 if (REG_P (shift_count
))
16250 /* Add check since it can be invoked before register
16251 allocation in pre-reload schedule. */
16252 if (reload_completed
16253 && true_regnum (set_dest
) == true_regnum (shift_count
))
16255 else if (REGNO(set_dest
) == REGNO(shift_count
))
16263 /* Return true if destination reg of SET_INSN is shift count of
16267 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
16269 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
16270 PATTERN (use_insn
));
16273 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16274 are ok, keeping in mind the possible movddup alternative. */
16277 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
16279 if (MEM_P (operands
[0]))
16280 return rtx_equal_p (operands
[0], operands
[1 + high
]);
16281 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
16286 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16287 then replicate the value for all elements of the vector
16291 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
16295 machine_mode scalar_mode
;
16326 n_elt
= GET_MODE_NUNITS (mode
);
16327 v
= rtvec_alloc (n_elt
);
16328 scalar_mode
= GET_MODE_INNER (mode
);
16330 RTVEC_ELT (v
, 0) = value
;
16332 for (i
= 1; i
< n_elt
; ++i
)
16333 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
16335 return gen_rtx_CONST_VECTOR (mode
, v
);
16338 gcc_unreachable ();
16342 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16343 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16344 for an SSE register. If VECT is true, then replicate the mask for
16345 all elements of the vector register. If INVERT is true, then create
16346 a mask excluding the sign bit. */
16349 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
16351 machine_mode vec_mode
, imode
;
16390 vec_mode
= VOIDmode
;
16395 gcc_unreachable ();
16398 machine_mode inner_mode
= GET_MODE_INNER (mode
);
16399 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
16400 GET_MODE_BITSIZE (inner_mode
));
16402 w
= wi::bit_not (w
);
16404 /* Force this value into the low part of a fp vector constant. */
16405 mask
= immed_wide_int_const (w
, imode
);
16406 mask
= gen_lowpart (inner_mode
, mask
);
16408 if (vec_mode
== VOIDmode
)
16409 return force_reg (inner_mode
, mask
);
16411 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
16412 return force_reg (vec_mode
, v
);
16415 /* Return HOST_WIDE_INT for const vector OP in MODE. */
16418 ix86_convert_const_vector_to_integer (rtx op
, machine_mode mode
)
16420 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16421 gcc_unreachable ();
16423 int nunits
= GET_MODE_NUNITS (mode
);
16424 wide_int val
= wi::zero (GET_MODE_BITSIZE (mode
));
16425 machine_mode innermode
= GET_MODE_INNER (mode
);
16426 unsigned int innermode_bits
= GET_MODE_BITSIZE (innermode
);
16436 for (int i
= 0; i
< nunits
; ++i
)
16438 int v
= INTVAL (XVECEXP (op
, 0, i
));
16439 wide_int wv
= wi::shwi (v
, innermode_bits
);
16440 val
= wi::insert (val
, wv
, innermode_bits
* i
, innermode_bits
);
16448 for (int i
= 0; i
< nunits
; ++i
)
16450 rtx x
= XVECEXP (op
, 0, i
);
16451 int v
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (x
),
16452 REAL_MODE_FORMAT (innermode
));
16453 wide_int wv
= wi::shwi (v
, innermode_bits
);
16454 val
= wi::insert (val
, wv
, innermode_bits
* i
, innermode_bits
);
16458 gcc_unreachable ();
16461 return val
.to_shwi ();
16464 /* Return TRUE or FALSE depending on whether the first SET in INSN
16465 has source and destination with matching CC modes, and that the
16466 CC mode is at least as constrained as REQ_MODE. */
16469 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
16472 machine_mode set_mode
;
16474 set
= PATTERN (insn
);
16475 if (GET_CODE (set
) == PARALLEL
)
16476 set
= XVECEXP (set
, 0, 0);
16477 gcc_assert (GET_CODE (set
) == SET
);
16478 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
16480 set_mode
= GET_MODE (SET_DEST (set
));
16484 if (req_mode
!= CCNOmode
16485 && (req_mode
!= CCmode
16486 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
16490 if (req_mode
== CCGCmode
)
16494 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
16498 if (req_mode
== CCZmode
)
16511 if (set_mode
!= req_mode
)
16516 gcc_unreachable ();
16519 return GET_MODE (SET_SRC (set
)) == set_mode
;
16523 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
16525 machine_mode mode
= GET_MODE (op0
);
16527 if (SCALAR_FLOAT_MODE_P (mode
))
16529 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
16535 /* Only zero flag is needed. */
16536 case EQ
: /* ZF=0 */
16537 case NE
: /* ZF!=0 */
16539 /* Codes needing carry flag. */
16540 case GEU
: /* CF=0 */
16541 case LTU
: /* CF=1 */
16543 /* Detect overflow checks. They need just the carry flag. */
16544 if (GET_CODE (op0
) == PLUS
16545 && (rtx_equal_p (op1
, XEXP (op0
, 0))
16546 || rtx_equal_p (op1
, XEXP (op0
, 1))))
16548 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
16550 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
16552 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
16553 where CC_CCC is either CC or CCC. */
16554 else if (code
== LTU
16555 && GET_CODE (op0
) == NEG
16556 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
16557 && REG_P (XEXP (geu
, 0))
16558 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
16559 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
16560 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
16561 && XEXP (geu
, 1) == const0_rtx
16562 && GET_CODE (op1
) == LTU
16563 && REG_P (XEXP (op1
, 0))
16564 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
16565 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
16566 && XEXP (op1
, 1) == const0_rtx
)
16568 /* Similarly for *x86_cmc pattern.
16569 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16570 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16571 It is sufficient to test that the operand modes are CCCmode. */
16572 else if (code
== LTU
16573 && GET_CODE (op0
) == NEG
16574 && GET_CODE (XEXP (op0
, 0)) == LTU
16575 && GET_MODE (XEXP (XEXP (op0
, 0), 0)) == CCCmode
16576 && GET_CODE (op1
) == GEU
16577 && GET_MODE (XEXP (op1
, 0)) == CCCmode
)
16581 case GTU
: /* CF=0 & ZF=0 */
16582 case LEU
: /* CF=1 | ZF=1 */
16584 /* Codes possibly doable only with sign flag when
16585 comparing against zero. */
16586 case GE
: /* SF=OF or SF=0 */
16587 case LT
: /* SF<>OF or SF=1 */
16588 if (op1
== const0_rtx
)
16591 /* For other cases Carry flag is not required. */
16593 /* Codes doable only with sign flag when comparing
16594 against zero, but we miss jump instruction for it
16595 so we need to use relational tests against overflow
16596 that thus needs to be zero. */
16597 case GT
: /* ZF=0 & SF=OF */
16598 case LE
: /* ZF=1 | SF<>OF */
16599 if (op1
== const0_rtx
)
16604 /* CCmode should be used in all other cases. */
16609 /* Return TRUE or FALSE depending on whether the ptest instruction
16610 INSN has source and destination with suitable matching CC modes. */
16613 ix86_match_ptest_ccmode (rtx insn
)
16616 machine_mode set_mode
;
16618 set
= PATTERN (insn
);
16619 gcc_assert (GET_CODE (set
) == SET
);
16620 src
= SET_SRC (set
);
16621 gcc_assert (GET_CODE (src
) == UNSPEC
16622 && XINT (src
, 1) == UNSPEC_PTEST
);
16624 set_mode
= GET_MODE (src
);
16625 if (set_mode
!= CCZmode
16626 && set_mode
!= CCCmode
16627 && set_mode
!= CCmode
)
16629 return GET_MODE (SET_DEST (set
)) == set_mode
;
16632 /* Return the fixed registers used for condition codes. */
16635 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
16638 *p2
= INVALID_REGNUM
;
16642 /* If two condition code modes are compatible, return a condition code
16643 mode which is compatible with both. Otherwise, return
16646 static machine_mode
16647 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
16652 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
16655 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
16656 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
16659 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
16660 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
16664 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
16666 else if (m2
== CCZmode
16667 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
16673 gcc_unreachable ();
16704 /* These are only compatible with themselves, which we already
16710 /* Return strategy to use for floating-point. We assume that fcomi is always
16711 preferrable where available, since that is also true when looking at size
16712 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16714 enum ix86_fpcmp_strategy
16715 ix86_fp_comparison_strategy (enum rtx_code
)
16717 /* Do fcomi/sahf based test when profitable. */
16720 return IX86_FPCMP_COMI
;
16722 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
16723 return IX86_FPCMP_SAHF
;
16725 return IX86_FPCMP_ARITH
;
16728 /* Convert comparison codes we use to represent FP comparison to integer
16729 code that will result in proper branch. Return UNKNOWN if no such code
16733 ix86_fp_compare_code_to_integer (enum rtx_code code
)
16757 /* Zero extend possibly SImode EXP to Pmode register. */
16759 ix86_zero_extend_to_Pmode (rtx exp
)
16761 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
16764 /* Return true if the function is called via PLT. */
16767 ix86_call_use_plt_p (rtx call_op
)
16769 if (SYMBOL_REF_LOCAL_P (call_op
))
16771 if (SYMBOL_REF_DECL (call_op
)
16772 && TREE_CODE (SYMBOL_REF_DECL (call_op
)) == FUNCTION_DECL
)
16774 /* NB: All ifunc functions must be called via PLT. */
16776 = cgraph_node::get (SYMBOL_REF_DECL (call_op
));
16777 if (node
&& node
->ifunc_resolver
)
16785 /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16786 the PLT entry will be used as the function address for local IFUNC
16787 functions. When the PIC register is needed for PLT call, indirect
16788 call via the PLT entry will fail since the PIC register may not be
16789 set up properly for indirect call. In this case, we should return
16793 ix86_ifunc_ref_local_ok (void)
16795 return !flag_pic
|| (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
);
16798 /* Return true if the function being called was marked with attribute
16799 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16800 to handle the non-PIC case in the backend because there is no easy
16801 interface for the front-end to force non-PLT calls to use the GOT.
16802 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16803 to call the function marked "noplt" indirectly. */
16806 ix86_nopic_noplt_attribute_p (rtx call_op
)
16808 if (flag_pic
|| ix86_cmodel
== CM_LARGE
16809 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
16810 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
16811 || SYMBOL_REF_LOCAL_P (call_op
))
16814 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
16817 || (symbol_decl
!= NULL_TREE
16818 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
16824 /* Helper to output the jmp/call. */
16826 ix86_output_jmp_thunk_or_indirect (const char *thunk_name
, const int regno
)
16828 if (thunk_name
!= NULL
)
16830 if ((REX_INT_REGNO_P (regno
) || REX2_INT_REGNO_P (regno
))
16831 && ix86_indirect_branch_cs_prefix
)
16832 fprintf (asm_out_file
, "\tcs\n");
16833 fprintf (asm_out_file
, "\tjmp\t");
16834 assemble_name (asm_out_file
, thunk_name
);
16835 putc ('\n', asm_out_file
);
16836 if ((ix86_harden_sls
& harden_sls_indirect_jmp
))
16837 fputs ("\tint3\n", asm_out_file
);
16840 output_indirect_thunk (regno
);
16843 /* Output indirect branch via a call and return thunk. CALL_OP is a
16844 register which contains the branch target. XASM is the assembly
16845 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16846 A normal call is converted to:
16848 call __x86_indirect_thunk_reg
16850 and a tail call is converted to:
16852 jmp __x86_indirect_thunk_reg
16856 ix86_output_indirect_branch_via_reg (rtx call_op
, bool sibcall_p
)
16858 char thunk_name_buf
[32];
16860 enum indirect_thunk_prefix need_prefix
16861 = indirect_thunk_need_prefix (current_output_insn
);
16862 int regno
= REGNO (call_op
);
16864 if (cfun
->machine
->indirect_branch_type
16865 != indirect_branch_thunk_inline
)
16867 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
16868 SET_HARD_REG_BIT (indirect_thunks_used
, regno
);
16870 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
16871 thunk_name
= thunk_name_buf
;
16877 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16880 if (thunk_name
!= NULL
)
16882 if ((REX_INT_REGNO_P (regno
) || REX_INT_REGNO_P (regno
))
16883 && ix86_indirect_branch_cs_prefix
)
16884 fprintf (asm_out_file
, "\tcs\n");
16885 fprintf (asm_out_file
, "\tcall\t");
16886 assemble_name (asm_out_file
, thunk_name
);
16887 putc ('\n', asm_out_file
);
16891 char indirectlabel1
[32];
16892 char indirectlabel2
[32];
16894 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
16896 indirectlabelno
++);
16897 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
16899 indirectlabelno
++);
16902 fputs ("\tjmp\t", asm_out_file
);
16903 assemble_name_raw (asm_out_file
, indirectlabel2
);
16904 fputc ('\n', asm_out_file
);
16906 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
16908 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16910 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
16913 fputs ("\tcall\t", asm_out_file
);
16914 assemble_name_raw (asm_out_file
, indirectlabel1
);
16915 fputc ('\n', asm_out_file
);
16919 /* Output indirect branch via a call and return thunk. CALL_OP is
16920 the branch target. XASM is the assembly template for CALL_OP.
16921 Branch is a tail call if SIBCALL_P is true. A normal call is
16927 jmp __x86_indirect_thunk
16931 and a tail call is converted to:
16934 jmp __x86_indirect_thunk
16938 ix86_output_indirect_branch_via_push (rtx call_op
, const char *xasm
,
16941 char thunk_name_buf
[32];
16944 enum indirect_thunk_prefix need_prefix
16945 = indirect_thunk_need_prefix (current_output_insn
);
16948 if (cfun
->machine
->indirect_branch_type
16949 != indirect_branch_thunk_inline
)
16951 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
16952 indirect_thunk_needed
= true;
16953 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
16954 thunk_name
= thunk_name_buf
;
16959 snprintf (push_buf
, sizeof (push_buf
), "push{%c}\t%s",
16960 TARGET_64BIT
? 'q' : 'l', xasm
);
16964 output_asm_insn (push_buf
, &call_op
);
16965 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
16969 char indirectlabel1
[32];
16970 char indirectlabel2
[32];
16972 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
16974 indirectlabelno
++);
16975 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
16977 indirectlabelno
++);
16980 fputs ("\tjmp\t", asm_out_file
);
16981 assemble_name_raw (asm_out_file
, indirectlabel2
);
16982 fputc ('\n', asm_out_file
);
16984 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
16986 /* An external function may be called via GOT, instead of PLT. */
16987 if (MEM_P (call_op
))
16989 struct ix86_address parts
;
16990 rtx addr
= XEXP (call_op
, 0);
16991 if (ix86_decompose_address (addr
, &parts
)
16992 && parts
.base
== stack_pointer_rtx
)
16994 /* Since call will adjust stack by -UNITS_PER_WORD,
16995 we must convert "disp(stack, index, scale)" to
16996 "disp+UNITS_PER_WORD(stack, index, scale)". */
16999 addr
= gen_rtx_MULT (Pmode
, parts
.index
,
17000 GEN_INT (parts
.scale
));
17001 addr
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
17005 addr
= stack_pointer_rtx
;
17008 if (parts
.disp
!= NULL_RTX
)
17009 disp
= plus_constant (Pmode
, parts
.disp
,
17012 disp
= GEN_INT (UNITS_PER_WORD
);
17014 addr
= gen_rtx_PLUS (Pmode
, addr
, disp
);
17015 call_op
= gen_rtx_MEM (GET_MODE (call_op
), addr
);
17019 output_asm_insn (push_buf
, &call_op
);
17021 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
17023 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
17026 fputs ("\tcall\t", asm_out_file
);
17027 assemble_name_raw (asm_out_file
, indirectlabel1
);
17028 fputc ('\n', asm_out_file
);
17032 /* Output indirect branch via a call and return thunk. CALL_OP is
17033 the branch target. XASM is the assembly template for CALL_OP.
17034 Branch is a tail call if SIBCALL_P is true. */
17037 ix86_output_indirect_branch (rtx call_op
, const char *xasm
,
17040 if (REG_P (call_op
))
17041 ix86_output_indirect_branch_via_reg (call_op
, sibcall_p
);
17043 ix86_output_indirect_branch_via_push (call_op
, xasm
, sibcall_p
);
17046 /* Output indirect jump. CALL_OP is the jump target. */
17049 ix86_output_indirect_jmp (rtx call_op
)
17051 if (cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
)
17053 /* We can't have red-zone since "call" in the indirect thunk
17054 pushes the return address onto stack, destroying red-zone. */
17055 if (ix86_red_zone_used
)
17056 gcc_unreachable ();
17058 ix86_output_indirect_branch (call_op
, "%0", true);
17061 output_asm_insn ("%!jmp\t%A0", &call_op
);
17062 return (ix86_harden_sls
& harden_sls_indirect_jmp
) ? "int3" : "";
17065 /* Output return instrumentation for current function if needed. */
17068 output_return_instrumentation (void)
17070 if (ix86_instrument_return
!= instrument_return_none
17072 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun
->decl
))
17074 if (ix86_flag_record_return
)
17075 fprintf (asm_out_file
, "1:\n");
17076 switch (ix86_instrument_return
)
17078 case instrument_return_call
:
17079 fprintf (asm_out_file
, "\tcall\t__return__\n");
17081 case instrument_return_nop5
:
17082 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
17083 fprintf (asm_out_file
, ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
17085 case instrument_return_none
:
17089 if (ix86_flag_record_return
)
17091 fprintf (asm_out_file
, "\t.section __return_loc, \"a\",@progbits\n");
17092 fprintf (asm_out_file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
17093 fprintf (asm_out_file
, "\t.previous\n");
17098 /* Output function return. CALL_OP is the jump target. Add a REP
17099 prefix to RET if LONG_P is true and function return is kept. */
17102 ix86_output_function_return (bool long_p
)
17104 output_return_instrumentation ();
17106 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
17108 char thunk_name
[32];
17109 enum indirect_thunk_prefix need_prefix
17110 = indirect_thunk_need_prefix (current_output_insn
);
17112 if (cfun
->machine
->function_return_type
17113 != indirect_branch_thunk_inline
)
17115 bool need_thunk
= (cfun
->machine
->function_return_type
17116 == indirect_branch_thunk
);
17117 indirect_thunk_name (thunk_name
, INVALID_REGNUM
, need_prefix
,
17119 indirect_return_needed
|= need_thunk
;
17120 fprintf (asm_out_file
, "\tjmp\t");
17121 assemble_name (asm_out_file
, thunk_name
);
17122 putc ('\n', asm_out_file
);
17125 output_indirect_thunk (INVALID_REGNUM
);
17130 output_asm_insn (long_p
? "rep%; ret" : "ret", nullptr);
17131 return (ix86_harden_sls
& harden_sls_return
) ? "int3" : "";
17134 /* Output indirect function return. RET_OP is the function return
17138 ix86_output_indirect_function_return (rtx ret_op
)
17140 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
17142 char thunk_name
[32];
17143 enum indirect_thunk_prefix need_prefix
17144 = indirect_thunk_need_prefix (current_output_insn
);
17145 unsigned int regno
= REGNO (ret_op
);
17146 gcc_assert (regno
== CX_REG
);
17148 if (cfun
->machine
->function_return_type
17149 != indirect_branch_thunk_inline
)
17151 bool need_thunk
= (cfun
->machine
->function_return_type
17152 == indirect_branch_thunk
);
17153 indirect_thunk_name (thunk_name
, regno
, need_prefix
, true);
17157 indirect_return_via_cx
= true;
17158 SET_HARD_REG_BIT (indirect_thunks_used
, CX_REG
);
17160 fprintf (asm_out_file
, "\tjmp\t");
17161 assemble_name (asm_out_file
, thunk_name
);
17162 putc ('\n', asm_out_file
);
17165 output_indirect_thunk (regno
);
17169 output_asm_insn ("%!jmp\t%A0", &ret_op
);
17170 if (ix86_harden_sls
& harden_sls_indirect_jmp
)
17171 fputs ("\tint3\n", asm_out_file
);
17176 /* Output the assembly for a call instruction. */
17179 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
17181 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
17182 bool output_indirect_p
17184 && cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
);
17185 bool seh_nop_p
= false;
17188 if (SIBLING_CALL_P (insn
))
17190 output_return_instrumentation ();
17193 if (ix86_nopic_noplt_attribute_p (call_op
))
17198 if (output_indirect_p
)
17199 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17201 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17205 if (output_indirect_p
)
17206 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17208 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17212 xasm
= "%!jmp\t%P0";
17214 /* SEH epilogue detection requires the indirect branch case
17215 to include REX.W. */
17216 else if (TARGET_SEH
)
17217 xasm
= "%!rex.W jmp\t%A0";
17220 if (output_indirect_p
)
17223 xasm
= "%!jmp\t%A0";
17226 if (output_indirect_p
&& !direct_p
)
17227 ix86_output_indirect_branch (call_op
, xasm
, true);
17230 output_asm_insn (xasm
, &call_op
);
17232 && (ix86_harden_sls
& harden_sls_indirect_jmp
))
17238 /* SEH unwinding can require an extra nop to be emitted in several
17239 circumstances. Determine if we have one of those. */
17244 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
17246 /* Prevent a catch region from being adjacent to a jump that would
17247 be interpreted as an epilogue sequence by the unwinder. */
17248 if (JUMP_P(i
) && CROSSING_JUMP_P (i
))
17254 /* If we get to another real insn, we don't need the nop. */
17258 /* If we get to the epilogue note, prevent a catch region from
17259 being adjacent to the standard epilogue sequence. Note that,
17260 if non-call exceptions are enabled, we already did it during
17261 epilogue expansion, or else, if the insn can throw internally,
17262 we already did it during the reorg pass. */
17263 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
17264 && !flag_non_call_exceptions
17265 && !can_throw_internal (insn
))
17272 /* If we didn't find a real insn following the call, prevent the
17273 unwinder from looking into the next function. */
17280 if (ix86_nopic_noplt_attribute_p (call_op
))
17285 if (output_indirect_p
)
17286 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17288 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
17292 if (output_indirect_p
)
17293 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
17295 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
17299 xasm
= "%!call\t%P0";
17303 if (output_indirect_p
)
17306 xasm
= "%!call\t%A0";
17309 if (output_indirect_p
&& !direct_p
)
17310 ix86_output_indirect_branch (call_op
, xasm
, false);
17312 output_asm_insn (xasm
, &call_op
);
17320 /* Return a MEM corresponding to a stack slot with mode MODE.
17321 Allocate a new slot if necessary.
17323 The RTL for a function can have several slots available: N is
17324 which slot to use. */
17327 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
17329 struct stack_local_entry
*s
;
17331 gcc_assert (n
< MAX_386_STACK_LOCALS
);
17333 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
17334 if (s
->mode
== mode
&& s
->n
== n
)
17335 return validize_mem (copy_rtx (s
->rtl
));
17338 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
17339 alignment with -m32 -mpreferred-stack-boundary=2. */
17342 && n
== SLOT_FLOATxFDI_387
17343 && ix86_preferred_stack_boundary
< GET_MODE_ALIGNMENT (DImode
))
17345 s
= ggc_alloc
<stack_local_entry
> ();
17348 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), align
);
17350 s
->next
= ix86_stack_locals
;
17351 ix86_stack_locals
= s
;
17352 return validize_mem (copy_rtx (s
->rtl
));
17356 ix86_instantiate_decls (void)
17358 struct stack_local_entry
*s
;
17360 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
17361 if (s
->rtl
!= NULL_RTX
)
17362 instantiate_decl_rtl (s
->rtl
);
17365 /* Check whether x86 address PARTS is a pc-relative address. */
17368 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
17370 rtx base
, index
, disp
;
17372 base
= parts
->base
;
17373 index
= parts
->index
;
17374 disp
= parts
->disp
;
17376 if (disp
&& !base
&& !index
)
17382 if (GET_CODE (disp
) == CONST
)
17383 symbol
= XEXP (disp
, 0);
17384 if (GET_CODE (symbol
) == PLUS
17385 && CONST_INT_P (XEXP (symbol
, 1)))
17386 symbol
= XEXP (symbol
, 0);
17388 if (GET_CODE (symbol
) == LABEL_REF
17389 || (GET_CODE (symbol
) == SYMBOL_REF
17390 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
17391 || (GET_CODE (symbol
) == UNSPEC
17392 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
17393 || XINT (symbol
, 1) == UNSPEC_PCREL
17394 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
17401 /* Calculate the length of the memory address in the instruction encoding.
17402 Includes addr32 prefix, does not include the one-byte modrm, opcode,
17403 or other prefixes. We never generate addr32 prefix for LEA insn. */
17406 memory_address_length (rtx addr
, bool lea
)
17408 struct ix86_address parts
;
17409 rtx base
, index
, disp
;
17413 if (GET_CODE (addr
) == PRE_DEC
17414 || GET_CODE (addr
) == POST_INC
17415 || GET_CODE (addr
) == PRE_MODIFY
17416 || GET_CODE (addr
) == POST_MODIFY
)
17419 ok
= ix86_decompose_address (addr
, &parts
);
17422 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
17424 /* If this is not LEA instruction, add the length of addr32 prefix. */
17425 if (TARGET_64BIT
&& !lea
17426 && (SImode_address_operand (addr
, VOIDmode
)
17427 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
17428 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
17432 index
= parts
.index
;
17435 if (base
&& SUBREG_P (base
))
17436 base
= SUBREG_REG (base
);
17437 if (index
&& SUBREG_P (index
))
17438 index
= SUBREG_REG (index
);
17440 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
17441 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
17444 - esp as the base always wants an index,
17445 - ebp as the base always wants a displacement,
17446 - r12 as the base always wants an index,
17447 - r13 as the base always wants a displacement. */
17449 /* Register Indirect. */
17450 if (base
&& !index
&& !disp
)
17452 /* esp (for its index) and ebp (for its displacement) need
17453 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
17455 if (base
== arg_pointer_rtx
17456 || base
== frame_pointer_rtx
17457 || REGNO (base
) == SP_REG
17458 || REGNO (base
) == BP_REG
17459 || REGNO (base
) == R12_REG
17460 || REGNO (base
) == R13_REG
)
17464 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
17465 is not disp32, but disp32(%rip), so for disp32
17466 SIB byte is needed, unless print_operand_address
17467 optimizes it into disp32(%rip) or (%rip) is implied
17469 else if (disp
&& !base
&& !index
)
17472 if (!ix86_rip_relative_addr_p (&parts
))
17477 /* Find the length of the displacement constant. */
17480 if (base
&& satisfies_constraint_K (disp
))
17485 /* ebp always wants a displacement. Similarly r13. */
17486 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
17489 /* An index requires the two-byte modrm form.... */
17491 /* ...like esp (or r12), which always wants an index. */
17492 || base
== arg_pointer_rtx
17493 || base
== frame_pointer_rtx
17494 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
17501 /* Compute default value for "length_immediate" attribute. When SHORTFORM
17502 is set, expect that insn have 8bit immediate alternative. */
17504 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
17508 extract_insn_cached (insn
);
17509 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
17510 if (CONSTANT_P (recog_data
.operand
[i
]))
17512 enum attr_mode mode
= get_attr_mode (insn
);
17515 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
17517 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
17524 ival
= trunc_int_for_mode (ival
, HImode
);
17527 ival
= trunc_int_for_mode (ival
, SImode
);
17532 if (IN_RANGE (ival
, -128, 127))
17549 /* Immediates for DImode instructions are encoded
17550 as 32bit sign extended values. */
17555 fatal_insn ("unknown insn mode", insn
);
17561 /* Compute default value for "length_address" attribute. */
17563 ix86_attr_length_address_default (rtx_insn
*insn
)
17567 if (get_attr_type (insn
) == TYPE_LEA
)
17569 rtx set
= PATTERN (insn
), addr
;
17571 if (GET_CODE (set
) == PARALLEL
)
17572 set
= XVECEXP (set
, 0, 0);
17574 gcc_assert (GET_CODE (set
) == SET
);
17576 addr
= SET_SRC (set
);
17578 return memory_address_length (addr
, true);
17581 extract_insn_cached (insn
);
17582 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
17584 rtx op
= recog_data
.operand
[i
];
17587 constrain_operands_cached (insn
, reload_completed
);
17588 if (which_alternative
!= -1)
17590 const char *constraints
= recog_data
.constraints
[i
];
17591 int alt
= which_alternative
;
17593 while (*constraints
== '=' || *constraints
== '+')
17596 while (*constraints
++ != ',')
17598 /* Skip ignored operands. */
17599 if (*constraints
== 'X')
17603 int len
= memory_address_length (XEXP (op
, 0), false);
17605 /* Account for segment prefix for non-default addr spaces. */
17606 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
17615 /* Compute default value for "length_vex" attribute. It includes
17616 2 or 3 byte VEX prefix and 1 opcode byte. */
17619 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
17622 int i
, reg_only
= 2 + 1;
17623 bool has_mem
= false;
17625 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17626 byte VEX prefix. */
17627 if (!has_0f_opcode
|| has_vex_w
)
17630 /* We can always use 2 byte VEX prefix in 32bit. */
17634 extract_insn_cached (insn
);
17636 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
17637 if (REG_P (recog_data
.operand
[i
]))
17639 /* REX.W bit uses 3 byte VEX prefix.
17640 REX2 with vex use extended EVEX prefix length is 4-byte. */
17641 if (GET_MODE (recog_data
.operand
[i
]) == DImode
17642 && GENERAL_REG_P (recog_data
.operand
[i
]))
17645 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17646 operand will be encoded using VEX.B, so be conservative.
17647 REX2 with vex use extended EVEX prefix length is 4-byte. */
17648 if (REX_INT_REGNO_P (recog_data
.operand
[i
])
17649 || REX2_INT_REGNO_P (recog_data
.operand
[i
])
17650 || REX_SSE_REGNO_P (recog_data
.operand
[i
]))
17653 else if (MEM_P (recog_data
.operand
[i
]))
17655 /* REX2.X or REX2.B bits use 3 byte VEX prefix. */
17656 if (x86_extended_rex2reg_mentioned_p (recog_data
.operand
[i
]))
17659 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17660 if (x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
17666 return has_mem
? 2 + 1 : reg_only
;
17671 ix86_class_likely_spilled_p (reg_class_t
);
17673 /* Returns true if lhs of insn is HW function argument register and set up
17674 is_spilled to true if it is likely spilled HW register. */
17676 insn_is_function_arg (rtx insn
, bool* is_spilled
)
17680 if (!NONDEBUG_INSN_P (insn
))
17682 /* Call instructions are not movable, ignore it. */
17685 insn
= PATTERN (insn
);
17686 if (GET_CODE (insn
) == PARALLEL
)
17687 insn
= XVECEXP (insn
, 0, 0);
17688 if (GET_CODE (insn
) != SET
)
17690 dst
= SET_DEST (insn
);
17691 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
17692 && ix86_function_arg_regno_p (REGNO (dst
)))
17694 /* Is it likely spilled HW register? */
17695 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
17696 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
17697 *is_spilled
= true;
17703 /* Add output dependencies for chain of function adjacent arguments if only
17704 there is a move to likely spilled HW register. Return first argument
17705 if at least one dependence was added or NULL otherwise. */
17707 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
17710 rtx_insn
*last
= call
;
17711 rtx_insn
*first_arg
= NULL
;
17712 bool is_spilled
= false;
17714 head
= PREV_INSN (head
);
17716 /* Find nearest to call argument passing instruction. */
17719 last
= PREV_INSN (last
);
17722 if (!NONDEBUG_INSN_P (last
))
17724 if (insn_is_function_arg (last
, &is_spilled
))
17732 insn
= PREV_INSN (last
);
17733 if (!INSN_P (insn
))
17737 if (!NONDEBUG_INSN_P (insn
))
17742 if (insn_is_function_arg (insn
, &is_spilled
))
17744 /* Add output depdendence between two function arguments if chain
17745 of output arguments contains likely spilled HW registers. */
17747 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
17748 first_arg
= last
= insn
;
17758 /* Add output or anti dependency from insn to first_arg to restrict its code
17761 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
17766 set
= single_set (insn
);
17769 tmp
= SET_DEST (set
);
17772 /* Add output dependency to the first function argument. */
17773 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
17776 /* Add anti dependency. */
17777 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
17780 /* Avoid cross block motion of function argument through adding dependency
17781 from the first non-jump instruction in bb. */
17783 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
17785 rtx_insn
*insn
= BB_END (bb
);
17789 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
17791 rtx set
= single_set (insn
);
17794 avoid_func_arg_motion (arg
, insn
);
17798 if (insn
== BB_HEAD (bb
))
17800 insn
= PREV_INSN (insn
);
17804 /* Hook for pre-reload schedule - avoid motion of function arguments
17805 passed in likely spilled HW registers. */
17807 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
17810 rtx_insn
*first_arg
= NULL
;
17811 if (reload_completed
)
17813 while (head
!= tail
&& DEBUG_INSN_P (head
))
17814 head
= NEXT_INSN (head
);
17815 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
17816 if (INSN_P (insn
) && CALL_P (insn
))
17818 first_arg
= add_parameter_dependencies (insn
, head
);
17821 /* Add dependee for first argument to predecessors if only
17822 region contains more than one block. */
17823 basic_block bb
= BLOCK_FOR_INSN (insn
);
17824 int rgn
= CONTAINING_RGN (bb
->index
);
17825 int nr_blks
= RGN_NR_BLOCKS (rgn
);
17826 /* Skip trivial regions and region head blocks that can have
17827 predecessors outside of region. */
17828 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
17833 /* Regions are SCCs with the exception of selective
17834 scheduling with pipelining of outer blocks enabled.
17835 So also check that immediate predecessors of a non-head
17836 block are in the same region. */
17837 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17839 /* Avoid creating of loop-carried dependencies through
17840 using topological ordering in the region. */
17841 if (rgn
== CONTAINING_RGN (e
->src
->index
)
17842 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
17843 add_dependee_for_func_arg (first_arg
, e
->src
);
17851 else if (first_arg
)
17852 avoid_func_arg_motion (first_arg
, insn
);
17855 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17856 HW registers to maximum, to schedule them at soon as possible. These are
17857 moves from function argument registers at the top of the function entry
17858 and moves from function return value registers after call. */
17860 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
17864 if (reload_completed
)
17867 if (!NONDEBUG_INSN_P (insn
))
17870 set
= single_set (insn
);
17873 rtx tmp
= SET_SRC (set
);
17875 && HARD_REGISTER_P (tmp
)
17876 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
17877 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
17878 return current_sched_info
->sched_max_insns_priority
;
17884 /* Prepare for scheduling pass. */
17886 ix86_sched_init_global (FILE *, int, int)
17888 /* Install scheduling hooks for current CPU. Some of these hooks are used
17889 in time-critical parts of the scheduler, so we only set them up when
17890 they are actually used. */
17893 case PROCESSOR_CORE2
:
17894 case PROCESSOR_NEHALEM
:
17895 case PROCESSOR_SANDYBRIDGE
:
17896 case PROCESSOR_HASWELL
:
17897 case PROCESSOR_TREMONT
:
17898 case PROCESSOR_ALDERLAKE
:
17899 case PROCESSOR_GENERIC
:
17900 /* Do not perform multipass scheduling for pre-reload schedule
17901 to save compile time. */
17902 if (reload_completed
)
17904 ix86_core2i7_init_hooks ();
17907 /* Fall through. */
17909 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
17910 targetm
.sched
.first_cycle_multipass_init
= NULL
;
17911 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
17912 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
17913 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
17914 targetm
.sched
.first_cycle_multipass_end
= NULL
;
17915 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
17921 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17923 static HOST_WIDE_INT
17924 ix86_static_rtx_alignment (machine_mode mode
)
17926 if (mode
== DFmode
)
17928 if (ALIGN_MODE_128 (mode
))
17929 return MAX (128, GET_MODE_ALIGNMENT (mode
));
17930 return GET_MODE_ALIGNMENT (mode
);
17933 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17935 static HOST_WIDE_INT
17936 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
17938 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
17939 || TREE_CODE (exp
) == INTEGER_CST
)
17941 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
17942 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
17943 return MAX (mode_align
, align
);
17945 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
17946 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
17947 return BITS_PER_WORD
;
17952 /* Implement TARGET_EMPTY_RECORD_P. */
17955 ix86_is_empty_record (const_tree type
)
17959 return default_is_empty_record (type
);
17962 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17965 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
17967 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
17969 if (!cum
->warn_empty
)
17972 if (!TYPE_EMPTY_P (type
))
17975 /* Don't warn if the function isn't visible outside of the TU. */
17976 if (cum
->decl
&& !TREE_PUBLIC (cum
->decl
))
17979 const_tree ctx
= get_ultimate_context (cum
->decl
);
17980 if (ctx
!= NULL_TREE
17981 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
17984 /* If the actual size of the type is zero, then there is no change
17985 in how objects of this size are passed. */
17986 if (int_size_in_bytes (type
) == 0)
17989 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
17990 "changes in %<-fabi-version=12%> (GCC 8)", type
);
17992 /* Only warn once. */
17993 cum
->warn_empty
= false;
17996 /* This hook returns name of multilib ABI. */
17998 static const char *
17999 ix86_get_multilib_abi_name (void)
18001 if (!(TARGET_64BIT_P (ix86_isa_flags
)))
18003 else if (TARGET_X32_P (ix86_isa_flags
))
18009 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
18010 the data type, and ALIGN is the alignment that the object would
18011 ordinarily have. */
18014 iamcu_alignment (tree type
, int align
)
18018 if (align
< 32 || TYPE_USER_ALIGN (type
))
18021 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
18023 type
= strip_array_types (type
);
18024 if (TYPE_ATOMIC (type
))
18027 mode
= TYPE_MODE (type
);
18028 switch (GET_MODE_CLASS (mode
))
18031 case MODE_COMPLEX_INT
:
18032 case MODE_COMPLEX_FLOAT
:
18034 case MODE_DECIMAL_FLOAT
:
18041 /* Compute the alignment for a static variable.
18042 TYPE is the data type, and ALIGN is the alignment that
18043 the object would ordinarily have. The value of this function is used
18044 instead of that alignment to align the object. */
18047 ix86_data_alignment (tree type
, unsigned int align
, bool opt
)
18049 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
18050 for symbols from other compilation units or symbols that don't need
18051 to bind locally. In order to preserve some ABI compatibility with
18052 those compilers, ensure we don't decrease alignment from what we
18055 unsigned int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
18057 /* A data structure, equal or greater than the size of a cache line
18058 (64 bytes in the Pentium 4 and other recent Intel processors, including
18059 processors based on Intel Core microarchitecture) should be aligned
18060 so that its base address is a multiple of a cache line size. */
18062 unsigned int max_align
18063 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
18065 if (max_align
< BITS_PER_WORD
)
18066 max_align
= BITS_PER_WORD
;
18068 switch (ix86_align_data_type
)
18070 case ix86_align_data_type_abi
: opt
= false; break;
18071 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
18072 case ix86_align_data_type_cacheline
: break;
18076 align
= iamcu_alignment (type
, align
);
18079 && AGGREGATE_TYPE_P (type
)
18080 && TYPE_SIZE (type
)
18081 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
18083 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
18084 && align
< max_align_compat
)
18085 align
= max_align_compat
;
18086 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
18087 && align
< max_align
)
18091 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18092 to 16byte boundary. */
18095 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
18096 && TYPE_SIZE (type
)
18097 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
18098 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
18106 if (TREE_CODE (type
) == ARRAY_TYPE
)
18108 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
18110 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
18113 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
18116 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
18118 if ((TYPE_MODE (type
) == XCmode
18119 || TYPE_MODE (type
) == TCmode
) && align
< 128)
18122 else if (RECORD_OR_UNION_TYPE_P (type
)
18123 && TYPE_FIELDS (type
))
18125 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
18127 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
18130 else if (SCALAR_FLOAT_TYPE_P (type
) || VECTOR_TYPE_P (type
)
18131 || TREE_CODE (type
) == INTEGER_TYPE
)
18133 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
18135 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
18142 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
18144 ix86_lower_local_decl_alignment (tree decl
)
18146 unsigned int new_align
= ix86_local_alignment (decl
, VOIDmode
,
18147 DECL_ALIGN (decl
), true);
18148 if (new_align
< DECL_ALIGN (decl
))
18149 SET_DECL_ALIGN (decl
, new_align
);
18152 /* Compute the alignment for a local variable or a stack slot. EXP is
18153 the data type or decl itself, MODE is the widest mode available and
18154 ALIGN is the alignment that the object would ordinarily have. The
18155 value of this macro is used instead of that alignment to align the
18159 ix86_local_alignment (tree exp
, machine_mode mode
,
18160 unsigned int align
, bool may_lower
)
18164 if (exp
&& DECL_P (exp
))
18166 type
= TREE_TYPE (exp
);
18175 /* Don't do dynamic stack realignment for long long objects with
18176 -mpreferred-stack-boundary=2. */
18180 && ix86_preferred_stack_boundary
< 64
18181 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
18182 && (!type
|| (!TYPE_USER_ALIGN (type
)
18183 && !TYPE_ATOMIC (strip_array_types (type
))))
18184 && (!decl
|| !DECL_USER_ALIGN (decl
)))
18187 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18188 register in MODE. We will return the largest alignment of XF
18192 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
18193 align
= GET_MODE_ALIGNMENT (DFmode
);
18197 /* Don't increase alignment for Intel MCU psABI. */
18201 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18202 to 16byte boundary. Exact wording is:
18204 An array uses the same alignment as its elements, except that a local or
18205 global array variable of length at least 16 bytes or
18206 a C99 variable-length array variable always has alignment of at least 16 bytes.
18208 This was added to allow use of aligned SSE instructions at arrays. This
18209 rule is meant for static storage (where compiler cannot do the analysis
18210 by itself). We follow it for automatic variables only when convenient.
18211 We fully control everything in the function compiled and functions from
18212 other unit cannot rely on the alignment.
18214 Exclude va_list type. It is the common case of local array where
18215 we cannot benefit from the alignment.
18217 TODO: Probably one should optimize for size only when var is not escaping. */
18218 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
18221 if (AGGREGATE_TYPE_P (type
)
18222 && (va_list_type_node
== NULL_TREE
18223 || (TYPE_MAIN_VARIANT (type
)
18224 != TYPE_MAIN_VARIANT (va_list_type_node
)))
18225 && TYPE_SIZE (type
)
18226 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
18227 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
18231 if (TREE_CODE (type
) == ARRAY_TYPE
)
18233 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
18235 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
18238 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
18240 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
18242 if ((TYPE_MODE (type
) == XCmode
18243 || TYPE_MODE (type
) == TCmode
) && align
< 128)
18246 else if (RECORD_OR_UNION_TYPE_P (type
)
18247 && TYPE_FIELDS (type
))
18249 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
18251 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
18254 else if (SCALAR_FLOAT_TYPE_P (type
) || VECTOR_TYPE_P (type
)
18255 || TREE_CODE (type
) == INTEGER_TYPE
)
18258 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
18260 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
18266 /* Compute the minimum required alignment for dynamic stack realignment
18267 purposes for a local variable, parameter or a stack slot. EXP is
18268 the data type or decl itself, MODE is its mode and ALIGN is the
18269 alignment that the object would ordinarily have. */
18272 ix86_minimum_alignment (tree exp
, machine_mode mode
,
18273 unsigned int align
)
18277 if (exp
&& DECL_P (exp
))
18279 type
= TREE_TYPE (exp
);
18288 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
18291 /* Don't do dynamic stack realignment for long long objects with
18292 -mpreferred-stack-boundary=2. */
18293 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
18294 && (!type
|| (!TYPE_USER_ALIGN (type
)
18295 && !TYPE_ATOMIC (strip_array_types (type
))))
18296 && (!decl
|| !DECL_USER_ALIGN (decl
)))
18298 gcc_checking_assert (!TARGET_STV
);
18305 /* Find a location for the static chain incoming to a nested function.
18306 This is a register, unless all free registers are used by arguments. */
18309 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
18315 /* We always use R10 in 64-bit mode. */
18320 const_tree fntype
, fndecl
;
18323 /* By default in 32-bit mode we use ECX to pass the static chain. */
18326 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
18328 fntype
= TREE_TYPE (fndecl_or_type
);
18329 fndecl
= fndecl_or_type
;
18333 fntype
= fndecl_or_type
;
18337 ccvt
= ix86_get_callcvt (fntype
);
18338 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
18340 /* Fastcall functions use ecx/edx for arguments, which leaves
18341 us with EAX for the static chain.
18342 Thiscall functions use ecx for arguments, which also
18343 leaves us with EAX for the static chain. */
18346 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
18348 /* Thiscall functions use ecx for arguments, which leaves
18349 us with EAX and EDX for the static chain.
18350 We are using for abi-compatibility EAX. */
18353 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
18355 /* For regparm 3, we have no free call-clobbered registers in
18356 which to store the static chain. In order to implement this,
18357 we have the trampoline push the static chain to the stack.
18358 However, we can't push a value below the return address when
18359 we call the nested function directly, so we have to use an
18360 alternate entry point. For this we use ESI, and have the
18361 alternate entry point push ESI, so that things appear the
18362 same once we're executing the nested function. */
18365 if (fndecl
== current_function_decl
18366 && !ix86_static_chain_on_stack
)
18368 gcc_assert (!reload_completed
);
18369 ix86_static_chain_on_stack
= true;
18371 return gen_frame_mem (SImode
,
18372 plus_constant (Pmode
,
18373 arg_pointer_rtx
, -8));
18379 return gen_rtx_REG (Pmode
, regno
);
18382 /* Emit RTL insns to initialize the variable parts of a trampoline.
18383 FNDECL is the decl of the target address; M_TRAMP is a MEM for
18384 the trampoline, and CHAIN_VALUE is an RTX for the static chain
18385 to be passed to the target function. */
18388 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
18393 bool need_endbr
= (flag_cf_protection
& CF_BRANCH
);
18395 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
18403 /* Insert ENDBR64. */
18404 mem
= adjust_address (m_tramp
, SImode
, offset
);
18405 emit_move_insn (mem
, gen_int_mode (0xfa1e0ff3, SImode
));
18409 /* Load the function address to r11. Try to load address using
18410 the shorter movl instead of movabs. We may want to support
18411 movq for kernel mode, but kernel does not use trampolines at
18412 the moment. FNADDR is a 32bit address and may not be in
18413 DImode when ptr_mode == SImode. Always use movl in this
18415 if (ptr_mode
== SImode
18416 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
18418 fnaddr
= copy_addr_to_reg (fnaddr
);
18420 mem
= adjust_address (m_tramp
, HImode
, offset
);
18421 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
18423 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
18424 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
18429 mem
= adjust_address (m_tramp
, HImode
, offset
);
18430 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
18432 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
18433 emit_move_insn (mem
, fnaddr
);
18437 /* Load static chain using movabs to r10. Use the shorter movl
18438 instead of movabs when ptr_mode == SImode. */
18439 if (ptr_mode
== SImode
)
18450 mem
= adjust_address (m_tramp
, HImode
, offset
);
18451 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
18453 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
18454 emit_move_insn (mem
, chain_value
);
18457 /* Jump to r11; the last (unused) byte is a nop, only there to
18458 pad the write out to a single 32-bit store. */
18459 mem
= adjust_address (m_tramp
, SImode
, offset
);
18460 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
18467 /* Depending on the static chain location, either load a register
18468 with a constant, or push the constant to the stack. All of the
18469 instructions are the same size. */
18470 chain
= ix86_static_chain (fndecl
, true);
18473 switch (REGNO (chain
))
18476 opcode
= 0xb8; break;
18478 opcode
= 0xb9; break;
18480 gcc_unreachable ();
18488 /* Insert ENDBR32. */
18489 mem
= adjust_address (m_tramp
, SImode
, offset
);
18490 emit_move_insn (mem
, gen_int_mode (0xfb1e0ff3, SImode
));
18494 mem
= adjust_address (m_tramp
, QImode
, offset
);
18495 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
18497 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
18498 emit_move_insn (mem
, chain_value
);
18501 mem
= adjust_address (m_tramp
, QImode
, offset
);
18502 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
18504 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
18506 /* Compute offset from the end of the jmp to the target function.
18507 In the case in which the trampoline stores the static chain on
18508 the stack, we need to skip the first insn which pushes the
18509 (call-saved) register static chain; this push is 1 byte. */
18511 int skip
= MEM_P (chain
) ? 1 : 0;
18512 /* Skip ENDBR32 at the entry of the target function. */
18514 && !cgraph_node::get (fndecl
)->only_called_directly_p ())
18516 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
18517 plus_constant (Pmode
, XEXP (m_tramp
, 0),
18519 NULL_RTX
, 1, OPTAB_DIRECT
);
18520 emit_move_insn (mem
, disp
);
18523 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
18525 #ifdef HAVE_ENABLE_EXECUTE_STACK
18526 #ifdef CHECK_EXECUTE_STACK_ENABLED
18527 if (CHECK_EXECUTE_STACK_ENABLED
)
18529 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
18530 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
18535 ix86_allocate_stack_slots_for_args (void)
18537 /* Naked functions should not allocate stack slots for arguments. */
18538 return !ix86_function_naked (current_function_decl
);
18542 ix86_warn_func_return (tree decl
)
18544 /* Naked functions are implemented entirely in assembly, including the
18545 return sequence, so suppress warnings about this. */
18546 return !ix86_function_naked (decl
);
18549 /* Return the shift count of a vector by scalar shift builtin second argument
18552 ix86_vector_shift_count (tree arg1
)
18554 if (tree_fits_uhwi_p (arg1
))
18556 else if (TREE_CODE (arg1
) == VECTOR_CST
&& CHAR_BIT
== 8)
18558 /* The count argument is weird, passed in as various 128-bit
18559 (or 64-bit) vectors, the low 64 bits from it are the count. */
18560 unsigned char buf
[16];
18561 int len
= native_encode_expr (arg1
, buf
, 16);
18564 tree t
= native_interpret_expr (uint64_type_node
, buf
, len
);
18565 if (t
&& tree_fits_uhwi_p (t
))
18571 /* Return true if arg_mask is all ones, ELEMS is elements number of
18572 corresponding vector. */
18574 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems
, tree arg_mask
)
18576 if (TREE_CODE (arg_mask
) != INTEGER_CST
)
18579 unsigned HOST_WIDE_INT mask
= TREE_INT_CST_LOW (arg_mask
);
18580 if (elems
== HOST_BITS_PER_WIDE_INT
)
18581 return mask
== HOST_WIDE_INT_M1U
;
18582 if ((mask
| (HOST_WIDE_INT_M1U
<< elems
)) != HOST_WIDE_INT_M1U
)
18589 ix86_fold_builtin (tree fndecl
, int n_args
,
18590 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
18592 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
18594 enum ix86_builtins fn_code
18595 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
18596 enum rtx_code rcode
;
18598 unsigned HOST_WIDE_INT mask
;
18602 case IX86_BUILTIN_CPU_IS
:
18603 case IX86_BUILTIN_CPU_SUPPORTS
:
18604 gcc_assert (n_args
== 1);
18605 return fold_builtin_cpu (fndecl
, args
);
18607 case IX86_BUILTIN_NANQ
:
18608 case IX86_BUILTIN_NANSQ
:
18610 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18611 const char *str
= c_getstr (*args
);
18612 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
18613 REAL_VALUE_TYPE real
;
18615 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
18616 return build_real (type
, real
);
18620 case IX86_BUILTIN_INFQ
:
18621 case IX86_BUILTIN_HUGE_VALQ
:
18623 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18624 REAL_VALUE_TYPE inf
;
18626 return build_real (type
, inf
);
18629 case IX86_BUILTIN_TZCNT16
:
18630 case IX86_BUILTIN_CTZS
:
18631 case IX86_BUILTIN_TZCNT32
:
18632 case IX86_BUILTIN_TZCNT64
:
18633 gcc_assert (n_args
== 1);
18634 if (TREE_CODE (args
[0]) == INTEGER_CST
)
18636 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18637 tree arg
= args
[0];
18638 if (fn_code
== IX86_BUILTIN_TZCNT16
18639 || fn_code
== IX86_BUILTIN_CTZS
)
18640 arg
= fold_convert (short_unsigned_type_node
, arg
);
18641 if (integer_zerop (arg
))
18642 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
18644 return fold_const_call (CFN_CTZ
, type
, arg
);
18648 case IX86_BUILTIN_LZCNT16
:
18649 case IX86_BUILTIN_CLZS
:
18650 case IX86_BUILTIN_LZCNT32
:
18651 case IX86_BUILTIN_LZCNT64
:
18652 gcc_assert (n_args
== 1);
18653 if (TREE_CODE (args
[0]) == INTEGER_CST
)
18655 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
18656 tree arg
= args
[0];
18657 if (fn_code
== IX86_BUILTIN_LZCNT16
18658 || fn_code
== IX86_BUILTIN_CLZS
)
18659 arg
= fold_convert (short_unsigned_type_node
, arg
);
18660 if (integer_zerop (arg
))
18661 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
18663 return fold_const_call (CFN_CLZ
, type
, arg
);
18667 case IX86_BUILTIN_BEXTR32
:
18668 case IX86_BUILTIN_BEXTR64
:
18669 case IX86_BUILTIN_BEXTRI32
:
18670 case IX86_BUILTIN_BEXTRI64
:
18671 gcc_assert (n_args
== 2);
18672 if (tree_fits_uhwi_p (args
[1]))
18674 unsigned HOST_WIDE_INT res
= 0;
18675 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
18676 unsigned int start
= tree_to_uhwi (args
[1]);
18677 unsigned int len
= (start
& 0xff00) >> 8;
18679 if (start
>= prec
|| len
== 0)
18681 else if (!tree_fits_uhwi_p (args
[0]))
18684 res
= tree_to_uhwi (args
[0]) >> start
;
18687 if (len
< HOST_BITS_PER_WIDE_INT
)
18688 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
18689 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18693 case IX86_BUILTIN_BZHI32
:
18694 case IX86_BUILTIN_BZHI64
:
18695 gcc_assert (n_args
== 2);
18696 if (tree_fits_uhwi_p (args
[1]))
18698 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
18699 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
18702 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), 0);
18703 if (!tree_fits_uhwi_p (args
[0]))
18705 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
18706 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
18707 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18711 case IX86_BUILTIN_PDEP32
:
18712 case IX86_BUILTIN_PDEP64
:
18713 gcc_assert (n_args
== 2);
18714 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
18716 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
18717 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
18718 unsigned HOST_WIDE_INT res
= 0;
18719 unsigned HOST_WIDE_INT m
, k
= 1;
18720 for (m
= 1; m
; m
<<= 1)
18721 if ((mask
& m
) != 0)
18723 if ((src
& k
) != 0)
18727 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18731 case IX86_BUILTIN_PEXT32
:
18732 case IX86_BUILTIN_PEXT64
:
18733 gcc_assert (n_args
== 2);
18734 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
18736 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
18737 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
18738 unsigned HOST_WIDE_INT res
= 0;
18739 unsigned HOST_WIDE_INT m
, k
= 1;
18740 for (m
= 1; m
; m
<<= 1)
18741 if ((mask
& m
) != 0)
18743 if ((src
& m
) != 0)
18747 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18751 case IX86_BUILTIN_MOVMSKPS
:
18752 case IX86_BUILTIN_PMOVMSKB
:
18753 case IX86_BUILTIN_MOVMSKPD
:
18754 case IX86_BUILTIN_PMOVMSKB128
:
18755 case IX86_BUILTIN_MOVMSKPD256
:
18756 case IX86_BUILTIN_MOVMSKPS256
:
18757 case IX86_BUILTIN_PMOVMSKB256
:
18758 gcc_assert (n_args
== 1);
18759 if (TREE_CODE (args
[0]) == VECTOR_CST
)
18761 HOST_WIDE_INT res
= 0;
18762 for (unsigned i
= 0; i
< VECTOR_CST_NELTS (args
[0]); ++i
)
18764 tree e
= VECTOR_CST_ELT (args
[0], i
);
18765 if (TREE_CODE (e
) == INTEGER_CST
&& !TREE_OVERFLOW (e
))
18767 if (wi::neg_p (wi::to_wide (e
)))
18768 res
|= HOST_WIDE_INT_1
<< i
;
18770 else if (TREE_CODE (e
) == REAL_CST
&& !TREE_OVERFLOW (e
))
18772 if (TREE_REAL_CST (e
).sign
)
18773 res
|= HOST_WIDE_INT_1
<< i
;
18778 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
18782 case IX86_BUILTIN_PSLLD
:
18783 case IX86_BUILTIN_PSLLD128
:
18784 case IX86_BUILTIN_PSLLD128_MASK
:
18785 case IX86_BUILTIN_PSLLD256
:
18786 case IX86_BUILTIN_PSLLD256_MASK
:
18787 case IX86_BUILTIN_PSLLD512
:
18788 case IX86_BUILTIN_PSLLDI
:
18789 case IX86_BUILTIN_PSLLDI128
:
18790 case IX86_BUILTIN_PSLLDI128_MASK
:
18791 case IX86_BUILTIN_PSLLDI256
:
18792 case IX86_BUILTIN_PSLLDI256_MASK
:
18793 case IX86_BUILTIN_PSLLDI512
:
18794 case IX86_BUILTIN_PSLLQ
:
18795 case IX86_BUILTIN_PSLLQ128
:
18796 case IX86_BUILTIN_PSLLQ128_MASK
:
18797 case IX86_BUILTIN_PSLLQ256
:
18798 case IX86_BUILTIN_PSLLQ256_MASK
:
18799 case IX86_BUILTIN_PSLLQ512
:
18800 case IX86_BUILTIN_PSLLQI
:
18801 case IX86_BUILTIN_PSLLQI128
:
18802 case IX86_BUILTIN_PSLLQI128_MASK
:
18803 case IX86_BUILTIN_PSLLQI256
:
18804 case IX86_BUILTIN_PSLLQI256_MASK
:
18805 case IX86_BUILTIN_PSLLQI512
:
18806 case IX86_BUILTIN_PSLLW
:
18807 case IX86_BUILTIN_PSLLW128
:
18808 case IX86_BUILTIN_PSLLW128_MASK
:
18809 case IX86_BUILTIN_PSLLW256
:
18810 case IX86_BUILTIN_PSLLW256_MASK
:
18811 case IX86_BUILTIN_PSLLW512_MASK
:
18812 case IX86_BUILTIN_PSLLWI
:
18813 case IX86_BUILTIN_PSLLWI128
:
18814 case IX86_BUILTIN_PSLLWI128_MASK
:
18815 case IX86_BUILTIN_PSLLWI256
:
18816 case IX86_BUILTIN_PSLLWI256_MASK
:
18817 case IX86_BUILTIN_PSLLWI512_MASK
:
18821 case IX86_BUILTIN_PSRAD
:
18822 case IX86_BUILTIN_PSRAD128
:
18823 case IX86_BUILTIN_PSRAD128_MASK
:
18824 case IX86_BUILTIN_PSRAD256
:
18825 case IX86_BUILTIN_PSRAD256_MASK
:
18826 case IX86_BUILTIN_PSRAD512
:
18827 case IX86_BUILTIN_PSRADI
:
18828 case IX86_BUILTIN_PSRADI128
:
18829 case IX86_BUILTIN_PSRADI128_MASK
:
18830 case IX86_BUILTIN_PSRADI256
:
18831 case IX86_BUILTIN_PSRADI256_MASK
:
18832 case IX86_BUILTIN_PSRADI512
:
18833 case IX86_BUILTIN_PSRAQ128_MASK
:
18834 case IX86_BUILTIN_PSRAQ256_MASK
:
18835 case IX86_BUILTIN_PSRAQ512
:
18836 case IX86_BUILTIN_PSRAQI128_MASK
:
18837 case IX86_BUILTIN_PSRAQI256_MASK
:
18838 case IX86_BUILTIN_PSRAQI512
:
18839 case IX86_BUILTIN_PSRAW
:
18840 case IX86_BUILTIN_PSRAW128
:
18841 case IX86_BUILTIN_PSRAW128_MASK
:
18842 case IX86_BUILTIN_PSRAW256
:
18843 case IX86_BUILTIN_PSRAW256_MASK
:
18844 case IX86_BUILTIN_PSRAW512
:
18845 case IX86_BUILTIN_PSRAWI
:
18846 case IX86_BUILTIN_PSRAWI128
:
18847 case IX86_BUILTIN_PSRAWI128_MASK
:
18848 case IX86_BUILTIN_PSRAWI256
:
18849 case IX86_BUILTIN_PSRAWI256_MASK
:
18850 case IX86_BUILTIN_PSRAWI512
:
18854 case IX86_BUILTIN_PSRLD
:
18855 case IX86_BUILTIN_PSRLD128
:
18856 case IX86_BUILTIN_PSRLD128_MASK
:
18857 case IX86_BUILTIN_PSRLD256
:
18858 case IX86_BUILTIN_PSRLD256_MASK
:
18859 case IX86_BUILTIN_PSRLD512
:
18860 case IX86_BUILTIN_PSRLDI
:
18861 case IX86_BUILTIN_PSRLDI128
:
18862 case IX86_BUILTIN_PSRLDI128_MASK
:
18863 case IX86_BUILTIN_PSRLDI256
:
18864 case IX86_BUILTIN_PSRLDI256_MASK
:
18865 case IX86_BUILTIN_PSRLDI512
:
18866 case IX86_BUILTIN_PSRLQ
:
18867 case IX86_BUILTIN_PSRLQ128
:
18868 case IX86_BUILTIN_PSRLQ128_MASK
:
18869 case IX86_BUILTIN_PSRLQ256
:
18870 case IX86_BUILTIN_PSRLQ256_MASK
:
18871 case IX86_BUILTIN_PSRLQ512
:
18872 case IX86_BUILTIN_PSRLQI
:
18873 case IX86_BUILTIN_PSRLQI128
:
18874 case IX86_BUILTIN_PSRLQI128_MASK
:
18875 case IX86_BUILTIN_PSRLQI256
:
18876 case IX86_BUILTIN_PSRLQI256_MASK
:
18877 case IX86_BUILTIN_PSRLQI512
:
18878 case IX86_BUILTIN_PSRLW
:
18879 case IX86_BUILTIN_PSRLW128
:
18880 case IX86_BUILTIN_PSRLW128_MASK
:
18881 case IX86_BUILTIN_PSRLW256
:
18882 case IX86_BUILTIN_PSRLW256_MASK
:
18883 case IX86_BUILTIN_PSRLW512
:
18884 case IX86_BUILTIN_PSRLWI
:
18885 case IX86_BUILTIN_PSRLWI128
:
18886 case IX86_BUILTIN_PSRLWI128_MASK
:
18887 case IX86_BUILTIN_PSRLWI256
:
18888 case IX86_BUILTIN_PSRLWI256_MASK
:
18889 case IX86_BUILTIN_PSRLWI512
:
18893 case IX86_BUILTIN_PSLLVV16HI
:
18894 case IX86_BUILTIN_PSLLVV16SI
:
18895 case IX86_BUILTIN_PSLLVV2DI
:
18896 case IX86_BUILTIN_PSLLVV2DI_MASK
:
18897 case IX86_BUILTIN_PSLLVV32HI
:
18898 case IX86_BUILTIN_PSLLVV4DI
:
18899 case IX86_BUILTIN_PSLLVV4DI_MASK
:
18900 case IX86_BUILTIN_PSLLVV4SI
:
18901 case IX86_BUILTIN_PSLLVV4SI_MASK
:
18902 case IX86_BUILTIN_PSLLVV8DI
:
18903 case IX86_BUILTIN_PSLLVV8HI
:
18904 case IX86_BUILTIN_PSLLVV8SI
:
18905 case IX86_BUILTIN_PSLLVV8SI_MASK
:
18909 case IX86_BUILTIN_PSRAVQ128
:
18910 case IX86_BUILTIN_PSRAVQ256
:
18911 case IX86_BUILTIN_PSRAVV16HI
:
18912 case IX86_BUILTIN_PSRAVV16SI
:
18913 case IX86_BUILTIN_PSRAVV32HI
:
18914 case IX86_BUILTIN_PSRAVV4SI
:
18915 case IX86_BUILTIN_PSRAVV4SI_MASK
:
18916 case IX86_BUILTIN_PSRAVV8DI
:
18917 case IX86_BUILTIN_PSRAVV8HI
:
18918 case IX86_BUILTIN_PSRAVV8SI
:
18919 case IX86_BUILTIN_PSRAVV8SI_MASK
:
18923 case IX86_BUILTIN_PSRLVV16HI
:
18924 case IX86_BUILTIN_PSRLVV16SI
:
18925 case IX86_BUILTIN_PSRLVV2DI
:
18926 case IX86_BUILTIN_PSRLVV2DI_MASK
:
18927 case IX86_BUILTIN_PSRLVV32HI
:
18928 case IX86_BUILTIN_PSRLVV4DI
:
18929 case IX86_BUILTIN_PSRLVV4DI_MASK
:
18930 case IX86_BUILTIN_PSRLVV4SI
:
18931 case IX86_BUILTIN_PSRLVV4SI_MASK
:
18932 case IX86_BUILTIN_PSRLVV8DI
:
18933 case IX86_BUILTIN_PSRLVV8HI
:
18934 case IX86_BUILTIN_PSRLVV8SI
:
18935 case IX86_BUILTIN_PSRLVV8SI_MASK
:
18941 gcc_assert (n_args
>= 2);
18942 if (TREE_CODE (args
[0]) != VECTOR_CST
)
18944 mask
= HOST_WIDE_INT_M1U
;
18947 /* This is masked shift. */
18948 if (!tree_fits_uhwi_p (args
[n_args
- 1])
18949 || TREE_SIDE_EFFECTS (args
[n_args
- 2]))
18951 mask
= tree_to_uhwi (args
[n_args
- 1]);
18952 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0]));
18953 mask
|= HOST_WIDE_INT_M1U
<< elems
;
18954 if (mask
!= HOST_WIDE_INT_M1U
18955 && TREE_CODE (args
[n_args
- 2]) != VECTOR_CST
)
18957 if (mask
== (HOST_WIDE_INT_M1U
<< elems
))
18958 return args
[n_args
- 2];
18960 if (is_vshift
&& TREE_CODE (args
[1]) != VECTOR_CST
)
18962 if (tree tem
= (is_vshift
? integer_one_node
18963 : ix86_vector_shift_count (args
[1])))
18965 unsigned HOST_WIDE_INT count
= tree_to_uhwi (tem
);
18966 unsigned HOST_WIDE_INT prec
18967 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args
[0])));
18968 if (count
== 0 && mask
== HOST_WIDE_INT_M1U
)
18972 if (rcode
== ASHIFTRT
)
18974 else if (mask
== HOST_WIDE_INT_M1U
)
18975 return build_zero_cst (TREE_TYPE (args
[0]));
18977 tree countt
= NULL_TREE
;
18981 countt
= integer_zero_node
;
18983 countt
= build_int_cst (integer_type_node
, count
);
18985 tree_vector_builder builder
;
18986 if (mask
!= HOST_WIDE_INT_M1U
|| is_vshift
)
18987 builder
.new_vector (TREE_TYPE (args
[0]),
18988 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0])),
18991 builder
.new_unary_operation (TREE_TYPE (args
[0]), args
[0],
18993 unsigned int cnt
= builder
.encoded_nelts ();
18994 for (unsigned int i
= 0; i
< cnt
; ++i
)
18996 tree elt
= VECTOR_CST_ELT (args
[0], i
);
18997 if (TREE_CODE (elt
) != INTEGER_CST
|| TREE_OVERFLOW (elt
))
18999 tree type
= TREE_TYPE (elt
);
19000 if (rcode
== LSHIFTRT
)
19001 elt
= fold_convert (unsigned_type_for (type
), elt
);
19004 countt
= VECTOR_CST_ELT (args
[1], i
);
19005 if (TREE_CODE (countt
) != INTEGER_CST
19006 || TREE_OVERFLOW (countt
))
19008 if (wi::neg_p (wi::to_wide (countt
))
19009 || wi::to_widest (countt
) >= prec
)
19011 if (rcode
== ASHIFTRT
)
19012 countt
= build_int_cst (TREE_TYPE (countt
),
19016 elt
= build_zero_cst (TREE_TYPE (elt
));
19017 countt
= build_zero_cst (TREE_TYPE (countt
));
19021 else if (count
>= prec
)
19022 elt
= build_zero_cst (TREE_TYPE (elt
));
19023 elt
= const_binop (rcode
== ASHIFT
19024 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
19025 TREE_TYPE (elt
), elt
, countt
);
19026 if (!elt
|| TREE_CODE (elt
) != INTEGER_CST
)
19028 if (rcode
== LSHIFTRT
)
19029 elt
= fold_convert (type
, elt
);
19030 if ((mask
& (HOST_WIDE_INT_1U
<< i
)) == 0)
19032 elt
= VECTOR_CST_ELT (args
[n_args
- 2], i
);
19033 if (TREE_CODE (elt
) != INTEGER_CST
19034 || TREE_OVERFLOW (elt
))
19037 builder
.quick_push (elt
);
19039 return builder
.build ();
19048 #ifdef SUBTARGET_FOLD_BUILTIN
19049 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
19055 /* Fold a MD builtin (use ix86_fold_builtin for folding into
19056 constant) in GIMPLE. */
19059 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
19061 gimple
*stmt
= gsi_stmt (*gsi
), *g
;
19062 gimple_seq stmts
= NULL
;
19063 tree fndecl
= gimple_call_fndecl (stmt
);
19064 gcc_checking_assert (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
));
19065 int n_args
= gimple_call_num_args (stmt
);
19066 enum ix86_builtins fn_code
19067 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
19068 tree decl
= NULL_TREE
;
19069 tree arg0
, arg1
, arg2
;
19070 enum rtx_code rcode
;
19071 enum tree_code tcode
;
19072 unsigned HOST_WIDE_INT count
;
19074 unsigned HOST_WIDE_INT elems
;
19077 /* Don't fold when there's isa mismatch. */
19078 if (!ix86_check_builtin_isa_match (fn_code
, NULL
, NULL
))
19083 case IX86_BUILTIN_TZCNT32
:
19084 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
19085 goto fold_tzcnt_lzcnt
;
19087 case IX86_BUILTIN_TZCNT64
:
19088 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
19089 goto fold_tzcnt_lzcnt
;
19091 case IX86_BUILTIN_LZCNT32
:
19092 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
19093 goto fold_tzcnt_lzcnt
;
19095 case IX86_BUILTIN_LZCNT64
:
19096 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
19097 goto fold_tzcnt_lzcnt
;
19100 gcc_assert (n_args
== 1);
19101 arg0
= gimple_call_arg (stmt
, 0);
19102 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
19104 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
19105 /* If arg0 is provably non-zero, optimize into generic
19106 __builtin_c[tl]z{,ll} function the middle-end handles
19108 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
19111 loc
= gimple_location (stmt
);
19112 g
= gimple_build_call (decl
, 1, arg0
);
19113 gimple_set_location (g
, loc
);
19114 tree lhs
= make_ssa_name (integer_type_node
);
19115 gimple_call_set_lhs (g
, lhs
);
19116 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
19117 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
19118 gimple_set_location (g
, loc
);
19119 gsi_replace (gsi
, g
, false);
19124 case IX86_BUILTIN_BZHI32
:
19125 case IX86_BUILTIN_BZHI64
:
19126 gcc_assert (n_args
== 2);
19127 arg1
= gimple_call_arg (stmt
, 1);
19128 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
19130 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
19131 arg0
= gimple_call_arg (stmt
, 0);
19132 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
19134 loc
= gimple_location (stmt
);
19135 g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
19136 gimple_set_location (g
, loc
);
19137 gsi_replace (gsi
, g
, false);
19142 case IX86_BUILTIN_PDEP32
:
19143 case IX86_BUILTIN_PDEP64
:
19144 case IX86_BUILTIN_PEXT32
:
19145 case IX86_BUILTIN_PEXT64
:
19146 gcc_assert (n_args
== 2);
19147 arg1
= gimple_call_arg (stmt
, 1);
19148 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
19150 loc
= gimple_location (stmt
);
19151 arg0
= gimple_call_arg (stmt
, 0);
19152 g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
19153 gimple_set_location (g
, loc
);
19154 gsi_replace (gsi
, g
, false);
19159 case IX86_BUILTIN_PBLENDVB256
:
19160 case IX86_BUILTIN_BLENDVPS256
:
19161 case IX86_BUILTIN_BLENDVPD256
:
19162 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
19163 to scalar operations and not combined back. */
19168 case IX86_BUILTIN_BLENDVPD
:
19169 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
19170 w/o sse4.2, it's veclowered to scalar operations and
19171 not combined back. */
19172 if (!TARGET_SSE4_2
)
19175 case IX86_BUILTIN_PBLENDVB128
:
19176 case IX86_BUILTIN_BLENDVPS
:
19177 gcc_assert (n_args
== 3);
19178 arg0
= gimple_call_arg (stmt
, 0);
19179 arg1
= gimple_call_arg (stmt
, 1);
19180 arg2
= gimple_call_arg (stmt
, 2);
19181 if (gimple_call_lhs (stmt
))
19183 loc
= gimple_location (stmt
);
19184 tree type
= TREE_TYPE (arg2
);
19185 if (VECTOR_FLOAT_TYPE_P (type
))
19187 tree itype
= GET_MODE_INNER (TYPE_MODE (type
)) == E_SFmode
19188 ? intSI_type_node
: intDI_type_node
;
19189 type
= get_same_sized_vectype (itype
, type
);
19192 type
= signed_type_for (type
);
19193 arg2
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
, type
, arg2
);
19194 tree zero_vec
= build_zero_cst (type
);
19195 tree cmp_type
= truth_type_for (type
);
19196 tree cmp
= gimple_build (&stmts
, LT_EXPR
, cmp_type
, arg2
, zero_vec
);
19197 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
19198 g
= gimple_build_assign (gimple_call_lhs (stmt
),
19199 VEC_COND_EXPR
, cmp
,
19201 gimple_set_location (g
, loc
);
19202 gsi_replace (gsi
, g
, false);
19205 gsi_replace (gsi
, gimple_build_nop (), false);
19209 case IX86_BUILTIN_PCMPEQB128
:
19210 case IX86_BUILTIN_PCMPEQW128
:
19211 case IX86_BUILTIN_PCMPEQD128
:
19212 case IX86_BUILTIN_PCMPEQQ
:
19213 case IX86_BUILTIN_PCMPEQB256
:
19214 case IX86_BUILTIN_PCMPEQW256
:
19215 case IX86_BUILTIN_PCMPEQD256
:
19216 case IX86_BUILTIN_PCMPEQQ256
:
19220 case IX86_BUILTIN_PCMPGTB128
:
19221 case IX86_BUILTIN_PCMPGTW128
:
19222 case IX86_BUILTIN_PCMPGTD128
:
19223 case IX86_BUILTIN_PCMPGTQ
:
19224 case IX86_BUILTIN_PCMPGTB256
:
19225 case IX86_BUILTIN_PCMPGTW256
:
19226 case IX86_BUILTIN_PCMPGTD256
:
19227 case IX86_BUILTIN_PCMPGTQ256
:
19231 gcc_assert (n_args
== 2);
19232 arg0
= gimple_call_arg (stmt
, 0);
19233 arg1
= gimple_call_arg (stmt
, 1);
19234 if (gimple_call_lhs (stmt
))
19236 loc
= gimple_location (stmt
);
19237 tree type
= TREE_TYPE (arg0
);
19238 tree zero_vec
= build_zero_cst (type
);
19239 tree minus_one_vec
= build_minus_one_cst (type
);
19240 tree cmp_type
= truth_type_for (type
);
19241 tree cmp
= gimple_build (&stmts
, tcode
, cmp_type
, arg0
, arg1
);
19242 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
19243 g
= gimple_build_assign (gimple_call_lhs (stmt
),
19244 VEC_COND_EXPR
, cmp
,
19245 minus_one_vec
, zero_vec
);
19246 gimple_set_location (g
, loc
);
19247 gsi_replace (gsi
, g
, false);
19250 gsi_replace (gsi
, gimple_build_nop (), false);
19253 case IX86_BUILTIN_PSLLD
:
19254 case IX86_BUILTIN_PSLLD128
:
19255 case IX86_BUILTIN_PSLLD128_MASK
:
19256 case IX86_BUILTIN_PSLLD256
:
19257 case IX86_BUILTIN_PSLLD256_MASK
:
19258 case IX86_BUILTIN_PSLLD512
:
19259 case IX86_BUILTIN_PSLLDI
:
19260 case IX86_BUILTIN_PSLLDI128
:
19261 case IX86_BUILTIN_PSLLDI128_MASK
:
19262 case IX86_BUILTIN_PSLLDI256
:
19263 case IX86_BUILTIN_PSLLDI256_MASK
:
19264 case IX86_BUILTIN_PSLLDI512
:
19265 case IX86_BUILTIN_PSLLQ
:
19266 case IX86_BUILTIN_PSLLQ128
:
19267 case IX86_BUILTIN_PSLLQ128_MASK
:
19268 case IX86_BUILTIN_PSLLQ256
:
19269 case IX86_BUILTIN_PSLLQ256_MASK
:
19270 case IX86_BUILTIN_PSLLQ512
:
19271 case IX86_BUILTIN_PSLLQI
:
19272 case IX86_BUILTIN_PSLLQI128
:
19273 case IX86_BUILTIN_PSLLQI128_MASK
:
19274 case IX86_BUILTIN_PSLLQI256
:
19275 case IX86_BUILTIN_PSLLQI256_MASK
:
19276 case IX86_BUILTIN_PSLLQI512
:
19277 case IX86_BUILTIN_PSLLW
:
19278 case IX86_BUILTIN_PSLLW128
:
19279 case IX86_BUILTIN_PSLLW128_MASK
:
19280 case IX86_BUILTIN_PSLLW256
:
19281 case IX86_BUILTIN_PSLLW256_MASK
:
19282 case IX86_BUILTIN_PSLLW512_MASK
:
19283 case IX86_BUILTIN_PSLLWI
:
19284 case IX86_BUILTIN_PSLLWI128
:
19285 case IX86_BUILTIN_PSLLWI128_MASK
:
19286 case IX86_BUILTIN_PSLLWI256
:
19287 case IX86_BUILTIN_PSLLWI256_MASK
:
19288 case IX86_BUILTIN_PSLLWI512_MASK
:
19292 case IX86_BUILTIN_PSRAD
:
19293 case IX86_BUILTIN_PSRAD128
:
19294 case IX86_BUILTIN_PSRAD128_MASK
:
19295 case IX86_BUILTIN_PSRAD256
:
19296 case IX86_BUILTIN_PSRAD256_MASK
:
19297 case IX86_BUILTIN_PSRAD512
:
19298 case IX86_BUILTIN_PSRADI
:
19299 case IX86_BUILTIN_PSRADI128
:
19300 case IX86_BUILTIN_PSRADI128_MASK
:
19301 case IX86_BUILTIN_PSRADI256
:
19302 case IX86_BUILTIN_PSRADI256_MASK
:
19303 case IX86_BUILTIN_PSRADI512
:
19304 case IX86_BUILTIN_PSRAQ128_MASK
:
19305 case IX86_BUILTIN_PSRAQ256_MASK
:
19306 case IX86_BUILTIN_PSRAQ512
:
19307 case IX86_BUILTIN_PSRAQI128_MASK
:
19308 case IX86_BUILTIN_PSRAQI256_MASK
:
19309 case IX86_BUILTIN_PSRAQI512
:
19310 case IX86_BUILTIN_PSRAW
:
19311 case IX86_BUILTIN_PSRAW128
:
19312 case IX86_BUILTIN_PSRAW128_MASK
:
19313 case IX86_BUILTIN_PSRAW256
:
19314 case IX86_BUILTIN_PSRAW256_MASK
:
19315 case IX86_BUILTIN_PSRAW512
:
19316 case IX86_BUILTIN_PSRAWI
:
19317 case IX86_BUILTIN_PSRAWI128
:
19318 case IX86_BUILTIN_PSRAWI128_MASK
:
19319 case IX86_BUILTIN_PSRAWI256
:
19320 case IX86_BUILTIN_PSRAWI256_MASK
:
19321 case IX86_BUILTIN_PSRAWI512
:
19325 case IX86_BUILTIN_PSRLD
:
19326 case IX86_BUILTIN_PSRLD128
:
19327 case IX86_BUILTIN_PSRLD128_MASK
:
19328 case IX86_BUILTIN_PSRLD256
:
19329 case IX86_BUILTIN_PSRLD256_MASK
:
19330 case IX86_BUILTIN_PSRLD512
:
19331 case IX86_BUILTIN_PSRLDI
:
19332 case IX86_BUILTIN_PSRLDI128
:
19333 case IX86_BUILTIN_PSRLDI128_MASK
:
19334 case IX86_BUILTIN_PSRLDI256
:
19335 case IX86_BUILTIN_PSRLDI256_MASK
:
19336 case IX86_BUILTIN_PSRLDI512
:
19337 case IX86_BUILTIN_PSRLQ
:
19338 case IX86_BUILTIN_PSRLQ128
:
19339 case IX86_BUILTIN_PSRLQ128_MASK
:
19340 case IX86_BUILTIN_PSRLQ256
:
19341 case IX86_BUILTIN_PSRLQ256_MASK
:
19342 case IX86_BUILTIN_PSRLQ512
:
19343 case IX86_BUILTIN_PSRLQI
:
19344 case IX86_BUILTIN_PSRLQI128
:
19345 case IX86_BUILTIN_PSRLQI128_MASK
:
19346 case IX86_BUILTIN_PSRLQI256
:
19347 case IX86_BUILTIN_PSRLQI256_MASK
:
19348 case IX86_BUILTIN_PSRLQI512
:
19349 case IX86_BUILTIN_PSRLW
:
19350 case IX86_BUILTIN_PSRLW128
:
19351 case IX86_BUILTIN_PSRLW128_MASK
:
19352 case IX86_BUILTIN_PSRLW256
:
19353 case IX86_BUILTIN_PSRLW256_MASK
:
19354 case IX86_BUILTIN_PSRLW512
:
19355 case IX86_BUILTIN_PSRLWI
:
19356 case IX86_BUILTIN_PSRLWI128
:
19357 case IX86_BUILTIN_PSRLWI128_MASK
:
19358 case IX86_BUILTIN_PSRLWI256
:
19359 case IX86_BUILTIN_PSRLWI256_MASK
:
19360 case IX86_BUILTIN_PSRLWI512
:
19364 case IX86_BUILTIN_PSLLVV16HI
:
19365 case IX86_BUILTIN_PSLLVV16SI
:
19366 case IX86_BUILTIN_PSLLVV2DI
:
19367 case IX86_BUILTIN_PSLLVV2DI_MASK
:
19368 case IX86_BUILTIN_PSLLVV32HI
:
19369 case IX86_BUILTIN_PSLLVV4DI
:
19370 case IX86_BUILTIN_PSLLVV4DI_MASK
:
19371 case IX86_BUILTIN_PSLLVV4SI
:
19372 case IX86_BUILTIN_PSLLVV4SI_MASK
:
19373 case IX86_BUILTIN_PSLLVV8DI
:
19374 case IX86_BUILTIN_PSLLVV8HI
:
19375 case IX86_BUILTIN_PSLLVV8SI
:
19376 case IX86_BUILTIN_PSLLVV8SI_MASK
:
19380 case IX86_BUILTIN_PSRAVQ128
:
19381 case IX86_BUILTIN_PSRAVQ256
:
19382 case IX86_BUILTIN_PSRAVV16HI
:
19383 case IX86_BUILTIN_PSRAVV16SI
:
19384 case IX86_BUILTIN_PSRAVV32HI
:
19385 case IX86_BUILTIN_PSRAVV4SI
:
19386 case IX86_BUILTIN_PSRAVV4SI_MASK
:
19387 case IX86_BUILTIN_PSRAVV8DI
:
19388 case IX86_BUILTIN_PSRAVV8HI
:
19389 case IX86_BUILTIN_PSRAVV8SI
:
19390 case IX86_BUILTIN_PSRAVV8SI_MASK
:
19394 case IX86_BUILTIN_PSRLVV16HI
:
19395 case IX86_BUILTIN_PSRLVV16SI
:
19396 case IX86_BUILTIN_PSRLVV2DI
:
19397 case IX86_BUILTIN_PSRLVV2DI_MASK
:
19398 case IX86_BUILTIN_PSRLVV32HI
:
19399 case IX86_BUILTIN_PSRLVV4DI
:
19400 case IX86_BUILTIN_PSRLVV4DI_MASK
:
19401 case IX86_BUILTIN_PSRLVV4SI
:
19402 case IX86_BUILTIN_PSRLVV4SI_MASK
:
19403 case IX86_BUILTIN_PSRLVV8DI
:
19404 case IX86_BUILTIN_PSRLVV8HI
:
19405 case IX86_BUILTIN_PSRLVV8SI
:
19406 case IX86_BUILTIN_PSRLVV8SI_MASK
:
19412 gcc_assert (n_args
>= 2);
19413 if (!gimple_call_lhs (stmt
))
19415 gsi_replace (gsi
, gimple_build_nop (), false);
19418 arg0
= gimple_call_arg (stmt
, 0);
19419 arg1
= gimple_call_arg (stmt
, 1);
19420 elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
19421 /* For masked shift, only optimize if the mask is all ones. */
19423 && !ix86_masked_all_ones (elems
, gimple_call_arg (stmt
, n_args
- 1)))
19427 if (TREE_CODE (arg1
) != VECTOR_CST
)
19429 count
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
)));
19430 if (integer_zerop (arg1
))
19432 else if (rcode
== ASHIFTRT
)
19435 for (unsigned int i
= 0; i
< VECTOR_CST_NELTS (arg1
); ++i
)
19437 tree elt
= VECTOR_CST_ELT (arg1
, i
);
19438 if (!wi::neg_p (wi::to_wide (elt
))
19439 && wi::to_widest (elt
) < count
)
19445 arg1
= ix86_vector_shift_count (arg1
);
19448 count
= tree_to_uhwi (arg1
);
19452 /* Just return the first argument for shift by 0. */
19453 loc
= gimple_location (stmt
);
19454 g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
19455 gimple_set_location (g
, loc
);
19456 gsi_replace (gsi
, g
, false);
19459 if (rcode
!= ASHIFTRT
19460 && count
>= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
))))
19462 /* For shift counts equal or greater than precision, except for
19463 arithmetic right shift the result is zero. */
19464 loc
= gimple_location (stmt
);
19465 g
= gimple_build_assign (gimple_call_lhs (stmt
),
19466 build_zero_cst (TREE_TYPE (arg0
)));
19467 gimple_set_location (g
, loc
);
19468 gsi_replace (gsi
, g
, false);
19473 case IX86_BUILTIN_SHUFPD512
:
19474 case IX86_BUILTIN_SHUFPS512
:
19475 case IX86_BUILTIN_SHUFPD
:
19476 case IX86_BUILTIN_SHUFPD256
:
19477 case IX86_BUILTIN_SHUFPS
:
19478 case IX86_BUILTIN_SHUFPS256
:
19479 arg0
= gimple_call_arg (stmt
, 0);
19480 elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
19481 /* This is masked shuffle. Only optimize if the mask is all ones. */
19483 && !ix86_masked_all_ones (elems
,
19484 gimple_call_arg (stmt
, n_args
- 1)))
19486 arg2
= gimple_call_arg (stmt
, 2);
19487 if (TREE_CODE (arg2
) == INTEGER_CST
&& gimple_call_lhs (stmt
))
19489 unsigned HOST_WIDE_INT shuffle_mask
= TREE_INT_CST_LOW (arg2
);
19490 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
19491 if (shuffle_mask
> 255)
19494 machine_mode imode
= GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0
)));
19495 loc
= gimple_location (stmt
);
19496 tree itype
= (imode
== E_DFmode
19497 ? long_long_integer_type_node
: integer_type_node
);
19498 tree vtype
= build_vector_type (itype
, elems
);
19499 tree_vector_builder
elts (vtype
, elems
, 1);
19502 /* Transform integer shuffle_mask to vector perm_mask which
19503 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
19504 for (unsigned i
= 0; i
!= elems
; i
++)
19507 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
19508 provide 2 select constrols for each element of the
19510 if (imode
== E_DFmode
)
19511 sel_idx
= (i
& 1) * elems
+ (i
& ~1)
19512 + ((shuffle_mask
>> i
) & 1);
19515 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
19516 controls for each element of the destination. */
19517 unsigned j
= i
% 4;
19518 sel_idx
= ((i
>> 1) & 1) * elems
+ (i
& ~3)
19519 + ((shuffle_mask
>> 2 * j
) & 3);
19521 elts
.quick_push (build_int_cst (itype
, sel_idx
));
19524 tree perm_mask
= elts
.build ();
19525 arg1
= gimple_call_arg (stmt
, 1);
19526 g
= gimple_build_assign (gimple_call_lhs (stmt
),
19528 arg0
, arg1
, perm_mask
);
19529 gimple_set_location (g
, loc
);
19530 gsi_replace (gsi
, g
, false);
19533 // Do not error yet, the constant could be propagated later?
19536 case IX86_BUILTIN_PABSB
:
19537 case IX86_BUILTIN_PABSW
:
19538 case IX86_BUILTIN_PABSD
:
19539 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
19540 if (!TARGET_MMX_WITH_SSE
)
19543 case IX86_BUILTIN_PABSB128
:
19544 case IX86_BUILTIN_PABSB256
:
19545 case IX86_BUILTIN_PABSB512
:
19546 case IX86_BUILTIN_PABSW128
:
19547 case IX86_BUILTIN_PABSW256
:
19548 case IX86_BUILTIN_PABSW512
:
19549 case IX86_BUILTIN_PABSD128
:
19550 case IX86_BUILTIN_PABSD256
:
19551 case IX86_BUILTIN_PABSD512
:
19552 case IX86_BUILTIN_PABSQ128
:
19553 case IX86_BUILTIN_PABSQ256
:
19554 case IX86_BUILTIN_PABSQ512
:
19555 case IX86_BUILTIN_PABSB128_MASK
:
19556 case IX86_BUILTIN_PABSB256_MASK
:
19557 case IX86_BUILTIN_PABSW128_MASK
:
19558 case IX86_BUILTIN_PABSW256_MASK
:
19559 case IX86_BUILTIN_PABSD128_MASK
:
19560 case IX86_BUILTIN_PABSD256_MASK
:
19561 gcc_assert (n_args
>= 1);
19562 if (!gimple_call_lhs (stmt
))
19564 gsi_replace (gsi
, gimple_build_nop (), false);
19567 arg0
= gimple_call_arg (stmt
, 0);
19568 elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
19569 /* For masked ABS, only optimize if the mask is all ones. */
19571 && !ix86_masked_all_ones (elems
, gimple_call_arg (stmt
, n_args
- 1)))
19574 tree utype
, ures
, vce
;
19575 utype
= unsigned_type_for (TREE_TYPE (arg0
));
19576 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
19577 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
19578 ures
= gimple_build (&stmts
, ABSU_EXPR
, utype
, arg0
);
19579 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
19580 loc
= gimple_location (stmt
);
19581 vce
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (arg0
), ures
);
19582 g
= gimple_build_assign (gimple_call_lhs (stmt
),
19583 VIEW_CONVERT_EXPR
, vce
);
19584 gsi_replace (gsi
, g
, false);
19595 /* Handler for an SVML-style interface to
19596 a library with vectorized intrinsics. */
19599 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
19602 tree fntype
, new_fndecl
, args
;
19605 machine_mode el_mode
, in_mode
;
19608 /* The SVML is suitable for unsafe math only. */
19609 if (!flag_unsafe_math_optimizations
)
19612 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19613 n
= TYPE_VECTOR_SUBPARTS (type_out
);
19614 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19615 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19616 if (el_mode
!= in_mode
19640 if ((el_mode
!= DFmode
|| n
!= 2)
19641 && (el_mode
!= SFmode
|| n
!= 4))
19649 tree fndecl
= mathfn_built_in (el_mode
== DFmode
19650 ? double_type_node
: float_type_node
, fn
);
19651 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
19653 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
19654 strcpy (name
, "vmlsLn4");
19655 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
19656 strcpy (name
, "vmldLn2");
19659 sprintf (name
, "vmls%s", bname
+10);
19660 name
[strlen (name
)-1] = '4';
19663 sprintf (name
, "vmld%s2", bname
+10);
19665 /* Convert to uppercase. */
19669 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
19673 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
19675 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
19677 /* Build a function declaration for the vectorized function. */
19678 new_fndecl
= build_decl (BUILTINS_LOCATION
,
19679 FUNCTION_DECL
, get_identifier (name
), fntype
);
19680 TREE_PUBLIC (new_fndecl
) = 1;
19681 DECL_EXTERNAL (new_fndecl
) = 1;
19682 DECL_IS_NOVOPS (new_fndecl
) = 1;
19683 TREE_READONLY (new_fndecl
) = 1;
19688 /* Handler for an ACML-style interface to
19689 a library with vectorized intrinsics. */
19692 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
19694 char name
[20] = "__vr.._";
19695 tree fntype
, new_fndecl
, args
;
19698 machine_mode el_mode
, in_mode
;
19701 /* The ACML is 64bits only and suitable for unsafe math only as
19702 it does not correctly support parts of IEEE with the required
19703 precision such as denormals. */
19705 || !flag_unsafe_math_optimizations
)
19708 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
19709 n
= TYPE_VECTOR_SUBPARTS (type_out
);
19710 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
19711 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
19712 if (el_mode
!= in_mode
19724 if (el_mode
== DFmode
&& n
== 2)
19729 else if (el_mode
== SFmode
&& n
== 4)
19742 tree fndecl
= mathfn_built_in (el_mode
== DFmode
19743 ? double_type_node
: float_type_node
, fn
);
19744 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
19745 sprintf (name
+ 7, "%s", bname
+10);
19748 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
19752 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
19754 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
19756 /* Build a function declaration for the vectorized function. */
19757 new_fndecl
= build_decl (BUILTINS_LOCATION
,
19758 FUNCTION_DECL
, get_identifier (name
), fntype
);
19759 TREE_PUBLIC (new_fndecl
) = 1;
19760 DECL_EXTERNAL (new_fndecl
) = 1;
19761 DECL_IS_NOVOPS (new_fndecl
) = 1;
19762 TREE_READONLY (new_fndecl
) = 1;
19767 /* Returns a decl of a function that implements scatter store with
19768 register type VECTYPE and index type INDEX_TYPE and SCALE.
19769 Return NULL_TREE if it is not available. */
19772 ix86_vectorize_builtin_scatter (const_tree vectype
,
19773 const_tree index_type
, int scale
)
19776 enum ix86_builtins code
;
19777 const machine_mode mode
= TYPE_MODE (TREE_TYPE (vectype
));
19779 if (!TARGET_AVX512F
)
19782 if (!TARGET_EVEX512
&& GET_MODE_SIZE (mode
) == 64)
19785 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 2u)
19786 ? !TARGET_USE_SCATTER_2PARTS
19787 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 4u)
19788 ? !TARGET_USE_SCATTER_4PARTS
19789 : !TARGET_USE_SCATTER_8PARTS
))
19792 if ((TREE_CODE (index_type
) != INTEGER_TYPE
19793 && !POINTER_TYPE_P (index_type
))
19794 || (TYPE_MODE (index_type
) != SImode
19795 && TYPE_MODE (index_type
) != DImode
))
19798 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
19801 /* v*scatter* insn sign extends index to pointer mode. */
19802 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
19803 && TYPE_UNSIGNED (index_type
))
19806 /* Scale can be 1, 2, 4 or 8. */
19809 || (scale
& (scale
- 1)) != 0)
19812 si
= TYPE_MODE (index_type
) == SImode
;
19813 switch (TYPE_MODE (vectype
))
19816 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
19819 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
19822 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
19825 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
19828 if (TARGET_AVX512VL
)
19829 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DF
: IX86_BUILTIN_SCATTERDIV4DF
;
19834 if (TARGET_AVX512VL
)
19835 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DI
: IX86_BUILTIN_SCATTERDIV4DI
;
19840 if (TARGET_AVX512VL
)
19841 code
= si
? IX86_BUILTIN_SCATTERSIV8SF
: IX86_BUILTIN_SCATTERALTDIV8SF
;
19846 if (TARGET_AVX512VL
)
19847 code
= si
? IX86_BUILTIN_SCATTERSIV8SI
: IX86_BUILTIN_SCATTERALTDIV8SI
;
19852 if (TARGET_AVX512VL
)
19853 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DF
: IX86_BUILTIN_SCATTERDIV2DF
;
19858 if (TARGET_AVX512VL
)
19859 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DI
: IX86_BUILTIN_SCATTERDIV2DI
;
19864 if (TARGET_AVX512VL
)
19865 code
= si
? IX86_BUILTIN_SCATTERSIV4SF
: IX86_BUILTIN_SCATTERALTDIV4SF
;
19870 if (TARGET_AVX512VL
)
19871 code
= si
? IX86_BUILTIN_SCATTERSIV4SI
: IX86_BUILTIN_SCATTERALTDIV4SI
;
19879 return get_ix86_builtin (code
);
19882 /* Return true if it is safe to use the rsqrt optabs to optimize
19886 use_rsqrt_p (machine_mode mode
)
19888 return ((mode
== HFmode
19889 || (TARGET_SSE
&& TARGET_SSE_MATH
))
19890 && flag_finite_math_only
19891 && !flag_trapping_math
19892 && flag_unsafe_math_optimizations
);
19895 /* Helper for avx_vpermilps256_operand et al. This is also used by
19896 the expansion functions to turn the parallel back into a mask.
19897 The return value is 0 for no match and the imm8+1 for a match. */
19900 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
19902 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
19904 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
19906 if (XVECLEN (par
, 0) != (int) nelt
)
19909 /* Validate that all of the elements are constants, and not totally
19910 out of range. Copy the data into an integral array to make the
19911 subsequent checks easier. */
19912 for (i
= 0; i
< nelt
; ++i
)
19914 rtx er
= XVECEXP (par
, 0, i
);
19915 unsigned HOST_WIDE_INT ei
;
19917 if (!CONST_INT_P (er
))
19928 /* In the 512-bit DFmode case, we can only move elements within
19929 a 128-bit lane. First fill the second part of the mask,
19931 for (i
= 4; i
< 6; ++i
)
19933 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
19935 mask
|= (ipar
[i
] - 4) << i
;
19937 for (i
= 6; i
< 8; ++i
)
19941 mask
|= (ipar
[i
] - 6) << i
;
19946 /* In the 256-bit DFmode case, we can only move elements within
19948 for (i
= 0; i
< 2; ++i
)
19952 mask
|= ipar
[i
] << i
;
19954 for (i
= 2; i
< 4; ++i
)
19958 mask
|= (ipar
[i
] - 2) << i
;
19963 /* In 512 bit SFmode case, permutation in the upper 256 bits
19964 must mirror the permutation in the lower 256-bits. */
19965 for (i
= 0; i
< 8; ++i
)
19966 if (ipar
[i
] + 8 != ipar
[i
+ 8])
19971 /* In 256 bit SFmode case, we have full freedom of
19972 movement within the low 128-bit lane, but the high 128-bit
19973 lane must mirror the exact same pattern. */
19974 for (i
= 0; i
< 4; ++i
)
19975 if (ipar
[i
] + 4 != ipar
[i
+ 4])
19982 /* In the 128-bit case, we've full freedom in the placement of
19983 the elements from the source operand. */
19984 for (i
= 0; i
< nelt
; ++i
)
19985 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
19989 gcc_unreachable ();
19992 /* Make sure success has a non-zero value by adding one. */
19996 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19997 the expansion functions to turn the parallel back into a mask.
19998 The return value is 0 for no match and the imm8+1 for a match. */
20001 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
20003 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
20005 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
20007 if (XVECLEN (par
, 0) != (int) nelt
)
20010 /* Validate that all of the elements are constants, and not totally
20011 out of range. Copy the data into an integral array to make the
20012 subsequent checks easier. */
20013 for (i
= 0; i
< nelt
; ++i
)
20015 rtx er
= XVECEXP (par
, 0, i
);
20016 unsigned HOST_WIDE_INT ei
;
20018 if (!CONST_INT_P (er
))
20021 if (ei
>= 2 * nelt
)
20026 /* Validate that the halves of the permute are halves. */
20027 for (i
= 0; i
< nelt2
- 1; ++i
)
20028 if (ipar
[i
] + 1 != ipar
[i
+ 1])
20030 for (i
= nelt2
; i
< nelt
- 1; ++i
)
20031 if (ipar
[i
] + 1 != ipar
[i
+ 1])
20034 /* Reconstruct the mask. */
20035 for (i
= 0; i
< 2; ++i
)
20037 unsigned e
= ipar
[i
* nelt2
];
20041 mask
|= e
<< (i
* 4);
20044 /* Make sure success has a non-zero value by adding one. */
20048 /* Return a mask of VPTERNLOG operands that do not affect output. */
20051 vpternlog_redundant_operand_mask (rtx pternlog_imm
)
20054 int imm8
= INTVAL (pternlog_imm
);
20056 if (((imm8
>> 4) & 0x0F) == (imm8
& 0x0F))
20058 if (((imm8
>> 2) & 0x33) == (imm8
& 0x33))
20060 if (((imm8
>> 1) & 0x55) == (imm8
& 0x55))
20066 /* Eliminate false dependencies on operands that do not affect output
20067 by substituting other operands of a VPTERNLOG. */
20070 substitute_vpternlog_operands (rtx
*operands
)
20072 int mask
= vpternlog_redundant_operand_mask (operands
[4]);
20074 if (mask
& 1) /* The first operand is redundant. */
20075 operands
[1] = operands
[2];
20077 if (mask
& 2) /* The second operand is redundant. */
20078 operands
[2] = operands
[1];
20080 if (mask
& 4) /* The third operand is redundant. */
20081 operands
[3] = operands
[1];
20082 else if (REG_P (operands
[3]))
20085 operands
[1] = operands
[3];
20087 operands
[2] = operands
[3];
20091 /* Return a register priority for hard reg REGNO. */
20093 ix86_register_priority (int hard_regno
)
20095 /* ebp and r13 as the base always wants a displacement, r12 as the
20096 base always wants an index. So discourage their usage in an
20098 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
20100 if (hard_regno
== BP_REG
)
20102 /* New x86-64 int registers result in bigger code size. Discourage them. */
20103 if (REX_INT_REGNO_P (hard_regno
))
20105 if (REX2_INT_REGNO_P (hard_regno
))
20107 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
20108 if (REX_SSE_REGNO_P (hard_regno
))
20110 if (EXT_REX_SSE_REGNO_P (hard_regno
))
20112 /* Usage of AX register results in smaller code. Prefer it. */
20113 if (hard_regno
== AX_REG
)
20118 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
20120 Put float CONST_DOUBLE in the constant pool instead of fp regs.
20121 QImode must go into class Q_REGS.
20122 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20123 movdf to do mem-to-mem moves through integer regs. */
20126 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
20128 machine_mode mode
= GET_MODE (x
);
20130 /* We're only allowed to return a subclass of CLASS. Many of the
20131 following checks fail for NO_REGS, so eliminate that early. */
20132 if (regclass
== NO_REGS
)
20135 /* All classes can load zeros. */
20136 if (x
== CONST0_RTX (mode
))
20139 /* Force constants into memory if we are loading a (nonzero) constant into
20140 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
20141 instructions to load from a constant. */
20143 && (MAYBE_MMX_CLASS_P (regclass
)
20144 || MAYBE_SSE_CLASS_P (regclass
)
20145 || MAYBE_MASK_CLASS_P (regclass
)))
20148 /* Floating-point constants need more complex checks. */
20149 if (CONST_DOUBLE_P (x
))
20151 /* General regs can load everything. */
20152 if (INTEGER_CLASS_P (regclass
))
20155 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20156 zero above. We only want to wind up preferring 80387 registers if
20157 we plan on doing computation with them. */
20158 if (IS_STACK_MODE (mode
)
20159 && standard_80387_constant_p (x
) > 0)
20161 /* Limit class to FP regs. */
20162 if (FLOAT_CLASS_P (regclass
))
20169 /* Prefer SSE if we can use them for math. Also allow integer regs
20170 when moves between register units are cheap. */
20171 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20173 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
20174 && TARGET_INTER_UNIT_MOVES_TO_VEC
20175 && GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (word_mode
))
20176 return INT_SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20178 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20181 /* Generally when we see PLUS here, it's the function invariant
20182 (plus soft-fp const_int). Which can only be computed into general
20184 if (GET_CODE (x
) == PLUS
)
20185 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20187 /* QImode constants are easy to load, but non-constant QImode data
20188 must go into Q_REGS or ALL_MASK_REGS. */
20189 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
20191 if (Q_CLASS_P (regclass
))
20193 else if (reg_class_subset_p (Q_REGS
, regclass
))
20195 else if (MASK_CLASS_P (regclass
))
20204 /* Discourage putting floating-point values in SSE registers unless
20205 SSE math is being used, and likewise for the 387 registers. */
20207 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
20209 /* Restrict the output reload class to the register bank that we are doing
20210 math on. If we would like not to return a subset of CLASS, reject this
20211 alternative: if reload cannot do this, it will still use its choice. */
20212 machine_mode mode
= GET_MODE (x
);
20213 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20214 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
20216 if (IS_STACK_MODE (mode
))
20217 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
20223 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
20224 machine_mode mode
, secondary_reload_info
*sri
)
20226 /* Double-word spills from general registers to non-offsettable memory
20227 references (zero-extended addresses) require special handling. */
20230 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
20231 && INTEGER_CLASS_P (rclass
)
20232 && !offsettable_memref_p (x
))
20235 ? CODE_FOR_reload_noff_load
20236 : CODE_FOR_reload_noff_store
);
20237 /* Add the cost of moving address to a temporary. */
20238 sri
->extra_cost
= 1;
20243 /* QImode spills from non-QI registers require
20244 intermediate register on 32bit targets. */
20246 && ((!TARGET_64BIT
&& !in_p
20247 && INTEGER_CLASS_P (rclass
)
20248 && MAYBE_NON_Q_CLASS_P (rclass
))
20249 || (!TARGET_AVX512DQ
20250 && MAYBE_MASK_CLASS_P (rclass
))))
20252 int regno
= true_regnum (x
);
20254 /* Return Q_REGS if the operand is in memory. */
20261 /* Require movement to gpr, and then store to memory. */
20262 if ((mode
== HFmode
|| mode
== HImode
|| mode
== V2QImode
20265 && SSE_CLASS_P (rclass
)
20266 && !in_p
&& MEM_P (x
))
20268 sri
->extra_cost
= 1;
20269 return GENERAL_REGS
;
20272 /* This condition handles corner case where an expression involving
20273 pointers gets vectorized. We're trying to use the address of a
20274 stack slot as a vector initializer.
20276 (set (reg:V2DI 74 [ vect_cst_.2 ])
20277 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
20279 Eventually frame gets turned into sp+offset like this:
20281 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20282 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20283 (const_int 392 [0x188]))))
20285 That later gets turned into:
20287 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20288 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
20289 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
20291 We'll have the following reload recorded:
20293 Reload 0: reload_in (DI) =
20294 (plus:DI (reg/f:DI 7 sp)
20295 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
20296 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20297 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
20298 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
20299 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
20300 reload_reg_rtx: (reg:V2DI 22 xmm1)
20302 Which isn't going to work since SSE instructions can't handle scalar
20303 additions. Returning GENERAL_REGS forces the addition into integer
20304 register and reload can handle subsequent reloads without problems. */
20306 if (in_p
&& GET_CODE (x
) == PLUS
20307 && SSE_CLASS_P (rclass
)
20308 && SCALAR_INT_MODE_P (mode
))
20309 return GENERAL_REGS
;
20314 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
20317 ix86_class_likely_spilled_p (reg_class_t rclass
)
20328 case SSE_FIRST_REG
:
20330 case FP_SECOND_REG
:
20340 /* Return true if a set of DST by the expression SRC should be allowed.
20341 This prevents complex sets of likely_spilled hard regs before reload. */
20344 ix86_hardreg_mov_ok (rtx dst
, rtx src
)
20346 /* Avoid complex sets of likely_spilled hard registers before reload. */
20347 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
20348 && !REG_P (src
) && !MEM_P (src
)
20349 && !(VECTOR_MODE_P (GET_MODE (dst
))
20350 ? standard_sse_constant_p (src
, GET_MODE (dst
))
20351 : x86_64_immediate_operand (src
, GET_MODE (dst
)))
20352 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
)))
20353 && !reload_completed
)
20358 /* If we are copying between registers from different register sets
20359 (e.g. FP and integer), we may need a memory location.
20361 The function can't work reliably when one of the CLASSES is a class
20362 containing registers from multiple sets. We avoid this by never combining
20363 different sets in a single alternative in the machine description.
20364 Ensure that this constraint holds to avoid unexpected surprises.
20366 When STRICT is false, we are being called from REGISTER_MOVE_COST,
20367 so do not enforce these sanity checks.
20369 To optimize register_move_cost performance, define inline variant. */
20372 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
20373 reg_class_t class2
, int strict
)
20375 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
20378 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
20379 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
20380 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
20381 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
20382 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
20383 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
20384 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
20385 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
20387 gcc_assert (!strict
|| lra_in_progress
);
20391 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
20394 /* ??? This is a lie. We do have moves between mmx/general, and for
20395 mmx/sse2. But by saying we need secondary memory we discourage the
20396 register allocator from using the mmx registers unless needed. */
20397 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
20400 /* Between mask and general, we have moves no larger than word size. */
20401 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
20403 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
))
20404 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20408 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20410 /* SSE1 doesn't have any direct moves from other classes. */
20414 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
)))
20417 int msize
= GET_MODE_SIZE (mode
);
20419 /* Between SSE and general, we have moves no larger than word size. */
20420 if (msize
> UNITS_PER_WORD
)
20423 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
20424 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
20425 int minsize
= GET_MODE_SIZE (TARGET_SSE2
? HImode
: SImode
);
20427 if (msize
< minsize
)
20430 /* If the target says that inter-unit moves are more expensive
20431 than moving through memory, then don't generate them. */
20432 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
20433 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
20440 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
20443 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
20444 reg_class_t class2
)
20446 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
20449 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
20451 get_secondary_mem widens integral modes to BITS_PER_WORD.
20452 There is no need to emit full 64 bit move on 64 bit targets
20453 for integral modes that can be moved using 32 bit move. */
20455 static machine_mode
20456 ix86_secondary_memory_needed_mode (machine_mode mode
)
20458 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
20459 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
20463 /* Implement the TARGET_CLASS_MAX_NREGS hook.
20465 On the 80386, this is the size of MODE in words,
20466 except in the FP regs, where a single reg is always enough. */
20468 static unsigned char
20469 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
20471 if (MAYBE_INTEGER_CLASS_P (rclass
))
20473 if (mode
== XFmode
)
20474 return (TARGET_64BIT
? 2 : 3);
20475 else if (mode
== XCmode
)
20476 return (TARGET_64BIT
? 4 : 6);
20478 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
20482 if (COMPLEX_MODE_P (mode
))
20489 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20492 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
20493 reg_class_t regclass
)
20498 /* x87 registers can't do subreg at all, as all values are reformatted
20499 to extended precision. */
20500 if (MAYBE_FLOAT_CLASS_P (regclass
))
20503 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
20505 /* Vector registers do not support QI or HImode loads. If we don't
20506 disallow a change to these modes, reload will assume it's ok to
20507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20508 the vec_dupv4hi pattern.
20509 NB: SSE2 can load 16bit data to sse register via pinsrw. */
20510 int mov_size
= MAYBE_SSE_CLASS_P (regclass
) && TARGET_SSE2
? 2 : 4;
20511 if (GET_MODE_SIZE (from
) < mov_size
20512 || GET_MODE_SIZE (to
) < mov_size
)
20519 /* Return index of MODE in the sse load/store tables. */
20522 sse_store_index (machine_mode mode
)
20524 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
20525 costs to processor_costs, which requires changes to all entries in
20526 processor cost table. */
20527 if (mode
== E_HFmode
)
20530 switch (GET_MODE_SIZE (mode
))
20547 /* Return the cost of moving data of mode M between a
20548 register and memory. A value of 2 is the default; this cost is
20549 relative to those in `REGISTER_MOVE_COST'.
20551 This function is used extensively by register_move_cost that is used to
20552 build tables at startup. Make it inline in this case.
20553 When IN is 2, return maximum of in and out move cost.
20555 If moving between registers and memory is more expensive than
20556 between two registers, you should define this macro to express the
20559 Model also increased moving costs of QImode registers in non
20563 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
, int in
)
20567 if (FLOAT_CLASS_P (regclass
))
20585 return MAX (ix86_cost
->hard_register
.fp_load
[index
],
20586 ix86_cost
->hard_register
.fp_store
[index
]);
20587 return in
? ix86_cost
->hard_register
.fp_load
[index
]
20588 : ix86_cost
->hard_register
.fp_store
[index
];
20590 if (SSE_CLASS_P (regclass
))
20592 int index
= sse_store_index (mode
);
20596 return MAX (ix86_cost
->hard_register
.sse_load
[index
],
20597 ix86_cost
->hard_register
.sse_store
[index
]);
20598 return in
? ix86_cost
->hard_register
.sse_load
[index
]
20599 : ix86_cost
->hard_register
.sse_store
[index
];
20601 if (MASK_CLASS_P (regclass
))
20604 switch (GET_MODE_SIZE (mode
))
20612 /* DImode loads and stores assumed to cost the same as SImode. */
20622 return MAX (ix86_cost
->hard_register
.mask_load
[index
],
20623 ix86_cost
->hard_register
.mask_store
[index
]);
20624 return in
? ix86_cost
->hard_register
.mask_load
[2]
20625 : ix86_cost
->hard_register
.mask_store
[2];
20627 if (MMX_CLASS_P (regclass
))
20630 switch (GET_MODE_SIZE (mode
))
20642 return MAX (ix86_cost
->hard_register
.mmx_load
[index
],
20643 ix86_cost
->hard_register
.mmx_store
[index
]);
20644 return in
? ix86_cost
->hard_register
.mmx_load
[index
]
20645 : ix86_cost
->hard_register
.mmx_store
[index
];
20647 switch (GET_MODE_SIZE (mode
))
20650 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
20653 return ix86_cost
->hard_register
.int_store
[0];
20654 if (TARGET_PARTIAL_REG_DEPENDENCY
20655 && optimize_function_for_speed_p (cfun
))
20656 cost
= ix86_cost
->hard_register
.movzbl_load
;
20658 cost
= ix86_cost
->hard_register
.int_load
[0];
20660 return MAX (cost
, ix86_cost
->hard_register
.int_store
[0]);
20666 return MAX (ix86_cost
->hard_register
.movzbl_load
,
20667 ix86_cost
->hard_register
.int_store
[0] + 4);
20669 return ix86_cost
->hard_register
.movzbl_load
;
20671 return ix86_cost
->hard_register
.int_store
[0] + 4;
20678 cost
= MAX (ix86_cost
->hard_register
.int_load
[1],
20679 ix86_cost
->hard_register
.int_store
[1]);
20681 cost
= in
? ix86_cost
->hard_register
.int_load
[1]
20682 : ix86_cost
->hard_register
.int_store
[1];
20684 if (mode
== E_HFmode
)
20686 /* Prefer SSE over GPR for HFmode. */
20688 int index
= sse_store_index (mode
);
20690 sse_cost
= MAX (ix86_cost
->hard_register
.sse_load
[index
],
20691 ix86_cost
->hard_register
.sse_store
[index
]);
20694 ? ix86_cost
->hard_register
.sse_load
[index
]
20695 : ix86_cost
->hard_register
.sse_store
[index
]);
20696 if (sse_cost
>= cost
)
20697 cost
= sse_cost
+ 1;
20703 cost
= MAX (ix86_cost
->hard_register
.int_load
[2],
20704 ix86_cost
->hard_register
.int_store
[2]);
20706 cost
= ix86_cost
->hard_register
.int_load
[2];
20708 cost
= ix86_cost
->hard_register
.int_store
[2];
20709 /* Multiply with the number of GPR moves needed. */
20710 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
20715 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
20717 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
20721 /* Return the cost of moving data from a register in class CLASS1 to
20722 one in class CLASS2.
20724 It is not required that the cost always equal 2 when FROM is the same as TO;
20725 on some machines it is expensive to move between registers if they are not
20726 general registers. */
20729 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
20730 reg_class_t class2_i
)
20732 enum reg_class class1
= (enum reg_class
) class1_i
;
20733 enum reg_class class2
= (enum reg_class
) class2_i
;
20735 /* In case we require secondary memory, compute cost of the store followed
20736 by load. In order to avoid bad register allocation choices, we need
20737 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20739 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
20743 cost
+= inline_memory_move_cost (mode
, class1
, 2);
20744 cost
+= inline_memory_move_cost (mode
, class2
, 2);
20746 /* In case of copying from general_purpose_register we may emit multiple
20747 stores followed by single load causing memory size mismatch stall.
20748 Count this as arbitrarily high cost of 20. */
20749 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
20750 && TARGET_MEMORY_MISMATCH_STALL
20751 && targetm
.class_max_nregs (class1
, mode
)
20752 > targetm
.class_max_nregs (class2
, mode
))
20755 /* In the case of FP/MMX moves, the registers actually overlap, and we
20756 have to switch modes in order to treat them differently. */
20757 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
20758 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
20764 /* Moves between MMX and non-MMX units require secondary memory. */
20765 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
20766 gcc_unreachable ();
20768 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
20769 return (SSE_CLASS_P (class1
)
20770 ? ix86_cost
->hard_register
.sse_to_integer
20771 : ix86_cost
->hard_register
.integer_to_sse
);
20773 /* Moves between mask register and GPR. */
20774 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
20776 return (MASK_CLASS_P (class1
)
20777 ? ix86_cost
->hard_register
.mask_to_integer
20778 : ix86_cost
->hard_register
.integer_to_mask
);
20780 /* Moving between mask registers. */
20781 if (MASK_CLASS_P (class1
) && MASK_CLASS_P (class2
))
20782 return ix86_cost
->hard_register
.mask_move
;
20784 if (MAYBE_FLOAT_CLASS_P (class1
))
20785 return ix86_cost
->hard_register
.fp_move
;
20786 if (MAYBE_SSE_CLASS_P (class1
))
20788 if (GET_MODE_BITSIZE (mode
) <= 128)
20789 return ix86_cost
->hard_register
.xmm_move
;
20790 if (GET_MODE_BITSIZE (mode
) <= 256)
20791 return ix86_cost
->hard_register
.ymm_move
;
20792 return ix86_cost
->hard_register
.zmm_move
;
20794 if (MAYBE_MMX_CLASS_P (class1
))
20795 return ix86_cost
->hard_register
.mmx_move
;
20799 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20800 words of a value of mode MODE but can be less for certain modes in
20801 special long registers.
20803 Actually there are no two word move instructions for consecutive
20804 registers. And only registers 0-3 may have mov byte instructions
20805 applied to them. */
20807 static unsigned int
20808 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
20810 if (GENERAL_REGNO_P (regno
))
20812 if (mode
== XFmode
)
20813 return TARGET_64BIT
? 2 : 3;
20814 if (mode
== XCmode
)
20815 return TARGET_64BIT
? 4 : 6;
20816 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
20818 if (COMPLEX_MODE_P (mode
))
20820 /* Register pair for mask registers. */
20821 if (mode
== P2QImode
|| mode
== P2HImode
)
20823 if (mode
== V64SFmode
|| mode
== V64SImode
)
20828 /* Implement REGMODE_NATURAL_SIZE(MODE). */
20830 ix86_regmode_natural_size (machine_mode mode
)
20832 if (mode
== P2HImode
|| mode
== P2QImode
)
20833 return GET_MODE_SIZE (mode
) / 2;
20834 return UNITS_PER_WORD
;
20837 /* Implement TARGET_HARD_REGNO_MODE_OK. */
20840 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
20842 /* Flags and only flags can only hold CCmode values. */
20843 if (CC_REGNO_P (regno
))
20844 return GET_MODE_CLASS (mode
) == MODE_CC
;
20845 if (GET_MODE_CLASS (mode
) == MODE_CC
20846 || GET_MODE_CLASS (mode
) == MODE_RANDOM
)
20848 if (STACK_REGNO_P (regno
))
20849 return VALID_FP_MODE_P (mode
);
20850 if (MASK_REGNO_P (regno
))
20852 /* Register pair only starts at even register number. */
20853 if ((mode
== P2QImode
|| mode
== P2HImode
))
20854 return MASK_PAIR_REGNO_P(regno
);
20856 return ((TARGET_AVX512F
&& VALID_MASK_REG_MODE (mode
))
20857 || (TARGET_AVX512BW
&& VALID_MASK_AVX512BW_MODE (mode
)));
20860 if (GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
20863 if (SSE_REGNO_P (regno
))
20865 /* We implement the move patterns for all vector modes into and
20866 out of SSE registers, even when no operation instructions
20869 /* For AVX-512 we allow, regardless of regno:
20871 - any of 512-bit wide vector mode
20872 - any scalar mode. */
20874 && ((VALID_AVX512F_REG_OR_XI_MODE (mode
) && TARGET_EVEX512
)
20875 || VALID_AVX512F_SCALAR_MODE (mode
)))
20878 /* For AVX-5124FMAPS or AVX-5124VNNIW
20879 allow V64SF and V64SI modes for special regnos. */
20880 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
20881 && (mode
== V64SFmode
|| mode
== V64SImode
)
20882 && MOD4_SSE_REGNO_P (regno
))
20885 /* TODO check for QI/HI scalars. */
20886 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20887 if (TARGET_AVX512VL
20888 && (VALID_AVX256_REG_OR_OI_MODE (mode
)
20889 || VALID_AVX512VL_128_REG_MODE (mode
)))
20892 /* xmm16-xmm31 are only available for AVX-512. */
20893 if (EXT_REX_SSE_REGNO_P (regno
))
20896 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20897 if (TARGET_SSE2
&& mode
== HImode
)
20900 /* OImode and AVX modes are available only when AVX is enabled. */
20901 return ((TARGET_AVX
20902 && VALID_AVX256_REG_OR_OI_MODE (mode
))
20903 || VALID_SSE_REG_MODE (mode
)
20904 || VALID_SSE2_REG_MODE (mode
)
20905 || VALID_MMX_REG_MODE (mode
)
20906 || VALID_MMX_REG_MODE_3DNOW (mode
));
20908 if (MMX_REGNO_P (regno
))
20910 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20911 so if the register is available at all, then we can move data of
20912 the given mode into or out of it. */
20913 return (VALID_MMX_REG_MODE (mode
)
20914 || VALID_MMX_REG_MODE_3DNOW (mode
));
20917 if (mode
== QImode
)
20919 /* Take care for QImode values - they can be in non-QI regs,
20920 but then they do cause partial register stalls. */
20921 if (ANY_QI_REGNO_P (regno
))
20923 if (!TARGET_PARTIAL_REG_STALL
)
20925 /* LRA checks if the hard register is OK for the given mode.
20926 QImode values can live in non-QI regs, so we allow all
20928 if (lra_in_progress
)
20930 return !can_create_pseudo_p ();
20932 /* We handle both integer and floats in the general purpose registers. */
20933 else if (VALID_INT_MODE_P (mode
)
20934 || VALID_FP_MODE_P (mode
))
20936 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20937 on to use that value in smaller contexts, this can easily force a
20938 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20939 supporting DImode, allow it. */
20940 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
20946 /* Implement TARGET_INSN_CALLEE_ABI. */
20948 const predefined_function_abi
&
20949 ix86_insn_callee_abi (const rtx_insn
*insn
)
20951 unsigned int abi_id
= 0;
20952 rtx pat
= PATTERN (insn
);
20953 if (vzeroupper_pattern (pat
, VOIDmode
))
20954 abi_id
= ABI_VZEROUPPER
;
20956 return function_abis
[abi_id
];
20959 /* Initialize function_abis with corresponding abi_id,
20960 currently only handle vzeroupper. */
20962 ix86_initialize_callee_abi (unsigned int abi_id
)
20964 gcc_assert (abi_id
== ABI_VZEROUPPER
);
20965 predefined_function_abi
&vzeroupper_abi
= function_abis
[abi_id
];
20966 if (!vzeroupper_abi
.initialized_p ())
20968 HARD_REG_SET full_reg_clobbers
;
20969 CLEAR_HARD_REG_SET (full_reg_clobbers
);
20970 vzeroupper_abi
.initialize (ABI_VZEROUPPER
, full_reg_clobbers
);
20975 ix86_expand_avx_vzeroupper (void)
20977 /* Initialize vzeroupper_abi here. */
20978 ix86_initialize_callee_abi (ABI_VZEROUPPER
);
20979 rtx_insn
*insn
= emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20980 /* Return false for non-local goto in can_nonlocal_goto. */
20981 make_reg_eh_region_note (insn
, 0, INT_MIN
);
20982 /* Flag used for call_insn indicates it's a fake call. */
20983 RTX_FLAG (insn
, used
) = 1;
20987 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20988 saves SSE registers across calls is Win64 (thus no need to check the
20989 current ABI here), and with AVX enabled Win64 only guarantees that
20990 the low 16 bytes are saved. */
20993 ix86_hard_regno_call_part_clobbered (unsigned int abi_id
, unsigned int regno
,
20996 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20997 if (abi_id
== ABI_VZEROUPPER
)
20998 return (GET_MODE_SIZE (mode
) > 16
20999 && ((TARGET_64BIT
&& REX_SSE_REGNO_P (regno
))
21000 || LEGACY_SSE_REGNO_P (regno
)));
21002 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
21005 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
21006 tieable integer mode. */
21009 ix86_tieable_integer_mode_p (machine_mode mode
)
21018 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
21021 return TARGET_64BIT
;
21028 /* Implement TARGET_MODES_TIEABLE_P.
21030 Return true if MODE1 is accessible in a register that can hold MODE2
21031 without copying. That is, all register classes that can hold MODE2
21032 can also hold MODE1. */
21035 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
21037 if (mode1
== mode2
)
21040 if (ix86_tieable_integer_mode_p (mode1
)
21041 && ix86_tieable_integer_mode_p (mode2
))
21044 /* MODE2 being XFmode implies fp stack or general regs, which means we
21045 can tie any smaller floating point modes to it. Note that we do not
21046 tie this with TFmode. */
21047 if (mode2
== XFmode
)
21048 return mode1
== SFmode
|| mode1
== DFmode
;
21050 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
21051 that we can tie it with SFmode. */
21052 if (mode2
== DFmode
)
21053 return mode1
== SFmode
;
21055 /* If MODE2 is only appropriate for an SSE register, then tie with
21056 any other mode acceptable to SSE registers. */
21057 if (GET_MODE_SIZE (mode2
) == 64
21058 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
21059 return (GET_MODE_SIZE (mode1
) == 64
21060 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
21061 if (GET_MODE_SIZE (mode2
) == 32
21062 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
21063 return (GET_MODE_SIZE (mode1
) == 32
21064 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
21065 if (GET_MODE_SIZE (mode2
) == 16
21066 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
21067 return (GET_MODE_SIZE (mode1
) == 16
21068 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
21070 /* If MODE2 is appropriate for an MMX register, then tie
21071 with any other mode acceptable to MMX registers. */
21072 if (GET_MODE_SIZE (mode2
) == 8
21073 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
21074 return (GET_MODE_SIZE (mode1
) == 8
21075 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
21077 /* SCmode and DImode can be tied. */
21078 if ((mode1
== E_SCmode
&& mode2
== E_DImode
)
21079 || (mode1
== E_DImode
&& mode2
== E_SCmode
))
21080 return TARGET_64BIT
;
21082 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
21083 if ((mode1
== E_SCmode
&& mode2
== E_V2SFmode
)
21084 || (mode1
== E_V2SFmode
&& mode2
== E_SCmode
)
21085 || (mode1
== E_DCmode
&& mode2
== E_V2DFmode
)
21086 || (mode1
== E_V2DFmode
&& mode2
== E_DCmode
))
21092 /* Return the cost of moving between two registers of mode MODE. */
21095 ix86_set_reg_reg_cost (machine_mode mode
)
21097 unsigned int units
= UNITS_PER_WORD
;
21099 switch (GET_MODE_CLASS (mode
))
21105 units
= GET_MODE_SIZE (CCmode
);
21109 if ((TARGET_SSE
&& mode
== TFmode
)
21110 || (TARGET_80387
&& mode
== XFmode
)
21111 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
21112 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
21113 units
= GET_MODE_SIZE (mode
);
21116 case MODE_COMPLEX_FLOAT
:
21117 if ((TARGET_SSE
&& mode
== TCmode
)
21118 || (TARGET_80387
&& mode
== XCmode
)
21119 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
21120 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
21121 units
= GET_MODE_SIZE (mode
);
21124 case MODE_VECTOR_INT
:
21125 case MODE_VECTOR_FLOAT
:
21126 if ((TARGET_AVX512F
&& TARGET_EVEX512
&& VALID_AVX512F_REG_MODE (mode
))
21127 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
21128 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
21129 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
21130 || ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
21131 && VALID_MMX_REG_MODE (mode
)))
21132 units
= GET_MODE_SIZE (mode
);
21135 /* Return the cost of moving between two registers of mode MODE,
21136 assuming that the move will be in pieces of at most UNITS bytes. */
21137 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
21140 /* Return cost of vector operation in MODE given that scalar version has
21144 ix86_vec_cost (machine_mode mode
, int cost
)
21146 if (!VECTOR_MODE_P (mode
))
21149 if (GET_MODE_BITSIZE (mode
) == 128
21150 && TARGET_SSE_SPLIT_REGS
)
21151 return cost
* GET_MODE_BITSIZE (mode
) / 64;
21152 else if (GET_MODE_BITSIZE (mode
) > 128
21153 && TARGET_AVX256_SPLIT_REGS
)
21154 return cost
* GET_MODE_BITSIZE (mode
) / 128;
21155 else if (GET_MODE_BITSIZE (mode
) > 256
21156 && TARGET_AVX512_SPLIT_REGS
)
21157 return cost
* GET_MODE_BITSIZE (mode
) / 256;
21161 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
21162 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
21164 ix86_widen_mult_cost (const struct processor_costs
*cost
,
21165 enum machine_mode mode
, bool uns_p
)
21167 gcc_assert (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
21168 int extra_cost
= 0;
21169 int basic_cost
= 0;
21174 if (!uns_p
|| mode
== V16HImode
)
21175 extra_cost
= cost
->sse_op
* 2;
21176 basic_cost
= cost
->mulss
* 2 + cost
->sse_op
* 4;
21180 /* pmulhw/pmullw can be used. */
21181 basic_cost
= cost
->mulss
* 2 + cost
->sse_op
* 2;
21184 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
21185 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
21186 if (!TARGET_SSE4_1
&& !uns_p
)
21187 extra_cost
= (cost
->mulss
+ cost
->addss
+ cost
->sse_op
) * 4
21188 + cost
->sse_op
* 2;
21191 basic_cost
= cost
->mulss
* 2 + cost
->sse_op
* 4;
21194 /* Not implemented. */
21197 return ix86_vec_cost (mode
, basic_cost
+ extra_cost
);
21200 /* Return cost of multiplication in MODE. */
21203 ix86_multiplication_cost (const struct processor_costs
*cost
,
21204 enum machine_mode mode
)
21206 machine_mode inner_mode
= mode
;
21207 if (VECTOR_MODE_P (mode
))
21208 inner_mode
= GET_MODE_INNER (mode
);
21210 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21211 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
21212 else if (X87_FLOAT_MODE_P (mode
))
21214 else if (FLOAT_MODE_P (mode
))
21215 return ix86_vec_cost (mode
,
21216 inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
);
21217 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21220 /* Cost of reading the memory. */
21227 /* Partial V*QImode is emulated with 4-6 insns. */
21232 if (TARGET_AVX512BW
&& TARGET_AVX512VL
)
21234 else if (TARGET_AVX2
)
21236 else if (TARGET_XOP
)
21237 extra
+= cost
->sse_load
[2];
21241 extra
+= cost
->sse_load
[2];
21246 /* V*QImode is emulated with 4-11 insns. */
21251 if (TARGET_AVX2
&& !TARGET_PREFER_AVX128
)
21253 if (!(TARGET_AVX512BW
&& TARGET_AVX512VL
))
21256 else if (TARGET_XOP
)
21260 extra
+= cost
->sse_load
[2];
21266 extra
+= cost
->sse_load
[2];
21275 if (!TARGET_AVX512BW
|| TARGET_PREFER_AVX256
)
21279 extra
+= cost
->sse_load
[3] * 2;
21286 extra
= cost
->sse_load
[3] * 2 + cost
->sse_load
[4] * 2;
21289 return ix86_vec_cost (mode
, cost
->mulss
* nmults
21290 + cost
->sse_op
* nops
) + extra
;
21293 /* pmulld is used in this case. No emulation is needed. */
21296 /* V4SImode is emulated with 7 insns. */
21298 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5);
21302 /* vpmullq is used in this case. No emulation is needed. */
21303 if (TARGET_AVX512DQ
&& TARGET_AVX512VL
)
21305 /* V*DImode is emulated with 6-8 insns. */
21306 else if (TARGET_XOP
&& mode
== V2DImode
)
21307 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 4);
21310 /* vpmullq is used in this case. No emulation is needed. */
21311 if (TARGET_AVX512DQ
&& mode
== V8DImode
)
21314 return ix86_vec_cost (mode
, cost
->mulss
* 3 + cost
->sse_op
* 5);
21318 return ix86_vec_cost (mode
, cost
->mulss
);
21322 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
21325 /* Return cost of multiplication in MODE. */
21328 ix86_division_cost (const struct processor_costs
*cost
,
21329 enum machine_mode mode
)
21331 machine_mode inner_mode
= mode
;
21332 if (VECTOR_MODE_P (mode
))
21333 inner_mode
= GET_MODE_INNER (mode
);
21335 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21336 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
21337 else if (X87_FLOAT_MODE_P (mode
))
21339 else if (FLOAT_MODE_P (mode
))
21340 return ix86_vec_cost (mode
,
21341 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
);
21343 return cost
->divide
[MODE_INDEX (mode
)];
21346 /* Return cost of shift in MODE.
21347 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
21348 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
21349 if op1 is a result of subreg.
21351 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
21354 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
21355 enum rtx_code code
,
21356 enum machine_mode mode
, bool constant_op1
,
21357 HOST_WIDE_INT op1_val
,
21359 bool shift_and_truncate
,
21360 bool *skip_op0
, bool *skip_op1
)
21363 *skip_op0
= *skip_op1
= false;
21365 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21368 /* Cost of reading the memory. */
21376 /* Use vpbroadcast. */
21377 extra
= cost
->sse_op
;
21379 extra
= cost
->sse_load
[2];
21383 if (code
== ASHIFTRT
)
21391 else if (TARGET_AVX512BW
&& TARGET_AVX512VL
)
21392 return ix86_vec_cost (mode
, cost
->sse_op
* 4);
21393 else if (TARGET_SSE4_1
)
21395 else if (code
== ASHIFTRT
)
21399 return ix86_vec_cost (mode
, cost
->sse_op
* count
) + extra
;
21404 /* For XOP we use vpshab, which requires a broadcast of the
21405 value to the variable shift insn. For constants this
21406 means a V16Q const in mem; even when we can perform the
21407 shift with one insn set the cost to prefer paddb. */
21410 extra
= cost
->sse_load
[2];
21411 return ix86_vec_cost (mode
, cost
->sse_op
) + extra
;
21415 count
= (code
== ASHIFT
) ? 3 : 4;
21416 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
21422 /* Use vpbroadcast. */
21423 extra
= cost
->sse_op
;
21425 extra
= (mode
== V16QImode
) ? cost
->sse_load
[2] : cost
->sse_load
[3];
21429 if (code
== ASHIFTRT
)
21437 else if (TARGET_AVX512BW
21438 && ((mode
== V32QImode
&& !TARGET_PREFER_AVX256
)
21439 || (mode
== V16QImode
&& TARGET_AVX512VL
21440 && !TARGET_PREFER_AVX128
)))
21441 return ix86_vec_cost (mode
, cost
->sse_op
* 4);
21442 else if (TARGET_AVX2
21443 && mode
== V16QImode
&& !TARGET_PREFER_AVX128
)
21445 else if (TARGET_SSE4_1
)
21447 else if (code
== ASHIFTRT
)
21451 return ix86_vec_cost (mode
, cost
->sse_op
* count
) + extra
;
21455 /* V*DImode arithmetic right shift is emulated. */
21456 if (code
== ASHIFTRT
&& !TARGET_AVX512VL
)
21461 count
= TARGET_SSE4_2
? 1 : 2;
21462 else if (TARGET_XOP
)
21464 else if (TARGET_SSE4_1
)
21469 else if (TARGET_XOP
)
21471 else if (TARGET_SSE4_2
)
21476 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
21480 return ix86_vec_cost (mode
, cost
->sse_op
);
21484 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21489 return cost
->shift_const
+ COSTS_N_INSNS (2);
21491 return cost
->shift_const
* 2;
21496 return cost
->shift_var
* 2;
21498 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
21504 return cost
->shift_const
;
21505 else if (shift_and_truncate
)
21508 *skip_op0
= *skip_op1
= true;
21509 /* Return the cost after shift-and truncation. */
21510 return cost
->shift_var
;
21513 return cost
->shift_var
;
21517 /* Compute a (partial) cost for rtx X. Return true if the complete
21518 cost has been computed, and false if subexpressions should be
21519 scanned. In either case, *TOTAL contains the cost result. */
21522 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
21523 int *total
, bool speed
)
21526 enum rtx_code code
= GET_CODE (x
);
21527 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
21528 const struct processor_costs
*cost
21529 = speed
? ix86_tune_cost
: &ix86_size_cost
;
21535 if (register_operand (SET_DEST (x
), VOIDmode
)
21536 && register_operand (SET_SRC (x
), VOIDmode
))
21538 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
21542 if (register_operand (SET_SRC (x
), VOIDmode
))
21543 /* Avoid potentially incorrect high cost from rtx_costs
21544 for non-tieable SUBREGs. */
21548 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
21550 if (CONSTANT_P (SET_SRC (x
)))
21551 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
21552 a small value, possibly zero for cheap constants. */
21553 src_cost
+= COSTS_N_INSNS (1);
21556 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
21563 if (x86_64_immediate_operand (x
, VOIDmode
))
21570 if (IS_STACK_MODE (mode
))
21571 switch (standard_80387_constant_p (x
))
21579 default: /* Other constants */
21586 switch (standard_sse_constant_p (x
, mode
))
21590 case 1: /* 0: xor eliminates false dependency */
21593 default: /* -1: cmp contains false dependency */
21599 case CONST_WIDE_INT
:
21600 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21601 it'll probably end up. Add a penalty for size. */
21602 *total
= (COSTS_N_INSNS (1)
21603 + (!TARGET_64BIT
&& flag_pic
)
21604 + (GET_MODE_SIZE (mode
) <= 4
21605 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
21609 /* The zero extensions is often completely free on x86_64, so make
21610 it as cheap as possible. */
21611 if (TARGET_64BIT
&& mode
== DImode
21612 && GET_MODE (XEXP (x
, 0)) == SImode
)
21614 else if (TARGET_ZERO_EXTEND_WITH_AND
)
21615 *total
= cost
->add
;
21617 *total
= cost
->movzx
;
21621 *total
= cost
->movsx
;
21625 if (SCALAR_INT_MODE_P (mode
)
21626 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
21627 && CONST_INT_P (XEXP (x
, 1)))
21629 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
21632 *total
= cost
->add
;
21635 if ((value
== 2 || value
== 3)
21636 && cost
->lea
<= cost
->shift_const
)
21638 *total
= cost
->lea
;
21648 bool skip_op0
, skip_op1
;
21649 *total
= ix86_shift_rotate_cost (cost
, code
, mode
,
21650 CONSTANT_P (XEXP (x
, 1)),
21651 CONST_INT_P (XEXP (x
, 1))
21652 ? INTVAL (XEXP (x
, 1)) : -1,
21653 GET_CODE (XEXP (x
, 1)) == AND
,
21654 SUBREG_P (XEXP (x
, 1))
21655 && GET_CODE (XEXP (XEXP (x
, 1),
21657 &skip_op0
, &skip_op1
);
21658 if (skip_op0
|| skip_op1
)
21661 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
21663 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
21672 gcc_assert (FLOAT_MODE_P (mode
));
21673 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
21675 *total
= ix86_vec_cost (mode
,
21676 GET_MODE_INNER (mode
) == SFmode
21677 ? cost
->fmass
: cost
->fmasd
);
21678 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
21680 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21682 if (GET_CODE (sub
) == NEG
)
21683 sub
= XEXP (sub
, 0);
21684 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
21687 if (GET_CODE (sub
) == NEG
)
21688 sub
= XEXP (sub
, 0);
21689 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
21694 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
21696 rtx op0
= XEXP (x
, 0);
21697 rtx op1
= XEXP (x
, 1);
21699 if (CONST_INT_P (XEXP (x
, 1)))
21701 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
21702 for (nbits
= 0; value
!= 0; value
&= value
- 1)
21706 /* This is arbitrary. */
21709 /* Compute costs correctly for widening multiplication. */
21710 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
21711 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
21712 == GET_MODE_SIZE (mode
))
21714 int is_mulwiden
= 0;
21715 machine_mode inner_mode
= GET_MODE (op0
);
21717 if (GET_CODE (op0
) == GET_CODE (op1
))
21718 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
21719 else if (CONST_INT_P (op1
))
21721 if (GET_CODE (op0
) == SIGN_EXTEND
)
21722 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
21725 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
21729 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
21733 // Double word multiplication requires 3 mults and 2 adds.
21734 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21736 mult_init
= 3 * cost
->mult_init
[MODE_INDEX (word_mode
)]
21740 else mult_init
= cost
->mult_init
[MODE_INDEX (mode
)];
21742 *total
= (mult_init
21743 + nbits
* cost
->mult_bit
21744 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
21745 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
21749 *total
= ix86_multiplication_cost (cost
, mode
);
21756 *total
= ix86_division_cost (cost
, mode
);
21760 if (GET_MODE_CLASS (mode
) == MODE_INT
21761 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
21763 if (GET_CODE (XEXP (x
, 0)) == PLUS
21764 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
21765 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
21766 && CONSTANT_P (XEXP (x
, 1)))
21768 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
21769 if (val
== 2 || val
== 4 || val
== 8)
21771 *total
= cost
->lea
;
21772 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
21773 outer_code
, opno
, speed
);
21774 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
21775 outer_code
, opno
, speed
);
21776 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21777 outer_code
, opno
, speed
);
21781 else if (GET_CODE (XEXP (x
, 0)) == MULT
21782 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
21784 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
21785 if (val
== 2 || val
== 4 || val
== 8)
21787 *total
= cost
->lea
;
21788 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21789 outer_code
, opno
, speed
);
21790 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21791 outer_code
, opno
, speed
);
21795 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
21797 rtx op
= XEXP (XEXP (x
, 0), 0);
21799 /* Add with carry, ignore the cost of adding a carry flag. */
21800 if (ix86_carry_flag_operator (op
, mode
)
21801 || ix86_carry_flag_unset_operator (op
, mode
))
21802 *total
= cost
->add
;
21805 *total
= cost
->lea
;
21806 *total
+= rtx_cost (op
, mode
,
21807 outer_code
, opno
, speed
);
21810 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
21811 outer_code
, opno
, speed
);
21812 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21813 outer_code
, opno
, speed
);
21820 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21821 if (GET_MODE_CLASS (mode
) == MODE_INT
21822 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
21823 && GET_CODE (XEXP (x
, 0)) == MINUS
21824 && (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
)
21825 || ix86_carry_flag_unset_operator (XEXP (XEXP (x
, 0), 1), mode
)))
21827 *total
= cost
->add
;
21828 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21829 outer_code
, opno
, speed
);
21830 *total
+= rtx_cost (XEXP (x
, 1), mode
,
21831 outer_code
, opno
, speed
);
21835 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
21836 *total
= cost
->addss
;
21837 else if (X87_FLOAT_MODE_P (mode
))
21838 *total
= cost
->fadd
;
21839 else if (FLOAT_MODE_P (mode
))
21840 *total
= ix86_vec_cost (mode
, cost
->addss
);
21841 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21842 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21843 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21844 *total
= cost
->add
* 2;
21846 *total
= cost
->add
;
21850 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
21851 || SSE_FLOAT_MODE_P (mode
))
21853 /* (ior (not ...) ...) can be a single insn in AVX512. */
21854 if (GET_CODE (XEXP (x
, 0)) == NOT
&& TARGET_AVX512F
21855 && ((TARGET_EVEX512
21856 && GET_MODE_SIZE (mode
) == 64)
21857 || (TARGET_AVX512VL
21858 && (GET_MODE_SIZE (mode
) == 32
21859 || GET_MODE_SIZE (mode
) == 16))))
21861 rtx right
= GET_CODE (XEXP (x
, 1)) != NOT
21862 ? XEXP (x
, 1) : XEXP (XEXP (x
, 1), 0);
21864 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21865 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21866 outer_code
, opno
, speed
)
21867 + rtx_cost (right
, mode
, outer_code
, opno
, speed
);
21870 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21872 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21873 *total
= cost
->add
* 2;
21875 *total
= cost
->add
;
21879 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
21880 || SSE_FLOAT_MODE_P (mode
))
21881 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21882 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21883 *total
= cost
->add
* 2;
21885 *total
= cost
->add
;
21889 if (address_no_seg_operand (x
, mode
))
21891 *total
= cost
->lea
;
21894 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
21895 || SSE_FLOAT_MODE_P (mode
))
21897 /* pandn is a single instruction. */
21898 if (GET_CODE (XEXP (x
, 0)) == NOT
)
21900 rtx right
= XEXP (x
, 1);
21902 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
21903 if (GET_CODE (right
) == NOT
&& TARGET_AVX512F
21904 && ((TARGET_EVEX512
21905 && GET_MODE_SIZE (mode
) == 64)
21906 || (TARGET_AVX512VL
21907 && (GET_MODE_SIZE (mode
) == 32
21908 || GET_MODE_SIZE (mode
) == 16))))
21909 right
= XEXP (right
, 0);
21911 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21912 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21913 outer_code
, opno
, speed
)
21914 + rtx_cost (right
, mode
, outer_code
, opno
, speed
);
21917 else if (GET_CODE (XEXP (x
, 1)) == NOT
)
21919 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21920 + rtx_cost (XEXP (x
, 0), mode
,
21921 outer_code
, opno
, speed
)
21922 + rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
21923 outer_code
, opno
, speed
);
21926 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
21928 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21930 if (TARGET_BMI
&& GET_CODE (XEXP (x
,0)) == NOT
)
21932 *total
= cost
->add
* 2
21933 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21934 outer_code
, opno
, speed
)
21935 + rtx_cost (XEXP (x
, 1), mode
,
21936 outer_code
, opno
, speed
);
21939 else if (TARGET_BMI
&& GET_CODE (XEXP (x
, 1)) == NOT
)
21941 *total
= cost
->add
* 2
21942 + rtx_cost (XEXP (x
, 0), mode
,
21943 outer_code
, opno
, speed
)
21944 + rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
21945 outer_code
, opno
, speed
);
21948 *total
= cost
->add
* 2;
21950 else if (TARGET_BMI
&& GET_CODE (XEXP (x
,0)) == NOT
)
21953 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21954 outer_code
, opno
, speed
)
21955 + rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
);
21958 else if (TARGET_BMI
&& GET_CODE (XEXP (x
,1)) == NOT
)
21961 + rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
21962 + rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
21963 outer_code
, opno
, speed
);
21967 *total
= cost
->add
;
21971 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21973 /* (not (xor ...)) can be a single insn in AVX512. */
21974 if (GET_CODE (XEXP (x
, 0)) == XOR
&& TARGET_AVX512F
21975 && ((TARGET_EVEX512
21976 && GET_MODE_SIZE (mode
) == 64)
21977 || (TARGET_AVX512VL
21978 && (GET_MODE_SIZE (mode
) == 32
21979 || GET_MODE_SIZE (mode
) == 16))))
21981 *total
= ix86_vec_cost (mode
, cost
->sse_op
)
21982 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
21983 outer_code
, opno
, speed
)
21984 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
21985 outer_code
, opno
, speed
);
21989 // vnot is pxor -1.
21990 *total
= ix86_vec_cost (mode
, cost
->sse_op
) + 1;
21992 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
21993 *total
= cost
->add
* 2;
21995 *total
= cost
->add
;
21999 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
22000 *total
= cost
->sse_op
;
22001 else if (X87_FLOAT_MODE_P (mode
))
22002 *total
= cost
->fchs
;
22003 else if (FLOAT_MODE_P (mode
))
22004 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
22005 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22006 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
22007 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
22008 *total
= cost
->add
* 3;
22010 *total
= cost
->add
;
22017 if (GET_CODE (op0
) == ZERO_EXTRACT
22018 && XEXP (op0
, 1) == const1_rtx
22019 && CONST_INT_P (XEXP (op0
, 2))
22020 && op1
== const0_rtx
)
22022 /* This kind of construct is implemented using test[bwl].
22023 Treat it as if we had an AND. */
22024 mode
= GET_MODE (XEXP (op0
, 0));
22025 *total
= (cost
->add
22026 + rtx_cost (XEXP (op0
, 0), mode
, outer_code
,
22028 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
22032 if (GET_CODE (op0
) == PLUS
&& rtx_equal_p (XEXP (op0
, 0), op1
))
22034 /* This is an overflow detection, count it as a normal compare. */
22035 *total
= rtx_cost (op0
, GET_MODE (op0
), COMPARE
, 0, speed
);
22041 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
22042 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
22043 if (mode
== CCCmode
22044 && GET_CODE (op0
) == NEG
22045 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
22046 && REG_P (XEXP (geu
, 0))
22047 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
22048 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
22049 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
22050 && XEXP (geu
, 1) == const0_rtx
22051 && GET_CODE (op1
) == LTU
22052 && REG_P (XEXP (op1
, 0))
22053 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
22054 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
22055 && XEXP (op1
, 1) == const0_rtx
)
22057 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
22062 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
22063 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
22064 if (mode
== CCCmode
22065 && GET_CODE (op0
) == NEG
22066 && GET_CODE (XEXP (op0
, 0)) == LTU
22067 && REG_P (XEXP (XEXP (op0
, 0), 0))
22068 && GET_MODE (XEXP (XEXP (op0
, 0), 0)) == CCCmode
22069 && REGNO (XEXP (XEXP (op0
, 0), 0)) == FLAGS_REG
22070 && XEXP (XEXP (op0
, 0), 1) == const0_rtx
22071 && GET_CODE (op1
) == GEU
22072 && REG_P (XEXP (op1
, 0))
22073 && GET_MODE (XEXP (op1
, 0)) == CCCmode
22074 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
22075 && XEXP (op1
, 1) == const0_rtx
)
22077 /* This is *x86_cmc. */
22079 *total
= COSTS_N_BYTES (1);
22080 else if (TARGET_SLOW_STC
)
22081 *total
= COSTS_N_INSNS (2);
22083 *total
= COSTS_N_INSNS (1);
22087 if (SCALAR_INT_MODE_P (GET_MODE (op0
))
22088 && GET_MODE_SIZE (GET_MODE (op0
)) > UNITS_PER_WORD
)
22090 if (op1
== const0_rtx
)
22092 + rtx_cost (op0
, GET_MODE (op0
), outer_code
, opno
, speed
);
22094 *total
= 3*cost
->add
22095 + rtx_cost (op0
, GET_MODE (op0
), outer_code
, opno
, speed
)
22096 + rtx_cost (op1
, GET_MODE (op0
), outer_code
, opno
, speed
);
22100 /* The embedded comparison operand is completely free. */
22101 if (!general_operand (op0
, GET_MODE (op0
)) && op1
== const0_rtx
)
22107 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
22110 *total
= ix86_vec_cost (mode
, cost
->addss
);
22113 case FLOAT_TRUNCATE
:
22114 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
22115 *total
= cost
->fadd
;
22117 *total
= ix86_vec_cost (mode
, cost
->addss
);
22121 /* SSE requires memory load for the constant operand. It may make
22122 sense to account for this. Of course the constant operand may or
22123 may not be reused. */
22124 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
22125 *total
= cost
->sse_op
;
22126 else if (X87_FLOAT_MODE_P (mode
))
22127 *total
= cost
->fabs
;
22128 else if (FLOAT_MODE_P (mode
))
22129 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
22133 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
22134 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
22135 else if (X87_FLOAT_MODE_P (mode
))
22136 *total
= cost
->fsqrt
;
22137 else if (FLOAT_MODE_P (mode
))
22138 *total
= ix86_vec_cost (mode
,
22139 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
);
22143 if (XINT (x
, 1) == UNSPEC_TP
)
22145 else if (XINT (x
, 1) == UNSPEC_VTERNLOG
)
22147 *total
= cost
->sse_op
;
22150 else if (XINT (x
, 1) == UNSPEC_PTEST
)
22152 *total
= cost
->sse_op
;
22153 rtx test_op0
= XVECEXP (x
, 0, 0);
22154 if (!rtx_equal_p (test_op0
, XVECEXP (x
, 0, 1)))
22156 if (GET_CODE (test_op0
) == AND
)
22158 rtx and_op0
= XEXP (test_op0
, 0);
22159 if (GET_CODE (and_op0
) == NOT
)
22160 and_op0
= XEXP (and_op0
, 0);
22161 *total
+= rtx_cost (and_op0
, GET_MODE (and_op0
),
22163 + rtx_cost (XEXP (test_op0
, 1), GET_MODE (and_op0
),
22167 *total
= rtx_cost (test_op0
, GET_MODE (test_op0
),
22175 case VEC_DUPLICATE
:
22176 /* ??? Assume all of these vector manipulation patterns are
22177 recognizable. In which case they all pretty much have the
22179 *total
= cost
->sse_op
;
22182 mask
= XEXP (x
, 2);
22183 /* This is masked instruction, assume the same cost,
22184 as nonmasked variant. */
22185 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
22186 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
22188 *total
= cost
->sse_op
;
22192 /* An insn that accesses memory is slightly more expensive
22193 than one that does not. */
22199 if (XEXP (x
, 1) == const1_rtx
22200 && GET_CODE (XEXP (x
, 2)) == ZERO_EXTEND
22201 && GET_MODE (XEXP (x
, 2)) == SImode
22202 && GET_MODE (XEXP (XEXP (x
, 2), 0)) == QImode
)
22204 /* Ignore cost of zero extension and masking of last argument. */
22205 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
22206 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
22207 *total
+= rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, code
, 2, speed
);
22214 && VECTOR_MODE_P (mode
)
22215 && (GET_MODE_SIZE (mode
) == 16 || GET_MODE_SIZE (mode
) == 32))
22218 *total
= speed
? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
22219 if (!REG_P (XEXP (x
, 0)))
22220 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
22221 if (!REG_P (XEXP (x
, 1)))
22222 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
22223 if (!REG_P (XEXP (x
, 2)))
22224 *total
+= rtx_cost (XEXP (x
, 2), mode
, code
, 2, speed
);
22227 else if (TARGET_CMOVE
22228 && SCALAR_INT_MODE_P (mode
)
22229 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
22232 *total
= COSTS_N_INSNS (1);
22233 if (!REG_P (XEXP (x
, 0)))
22234 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
22235 if (!REG_P (XEXP (x
, 1)))
22236 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
22237 if (!REG_P (XEXP (x
, 2)))
22238 *total
+= rtx_cost (XEXP (x
, 2), mode
, code
, 2, speed
);
22250 static int current_machopic_label_num
;
22252 /* Given a symbol name and its associated stub, write out the
22253 definition of the stub. */
22256 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
22258 unsigned int length
;
22259 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
22260 int label
= ++current_machopic_label_num
;
22262 /* For 64-bit we shouldn't get here. */
22263 gcc_assert (!TARGET_64BIT
);
22265 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22266 symb
= targetm
.strip_name_encoding (symb
);
22268 length
= strlen (stub
);
22269 binder_name
= XALLOCAVEC (char, length
+ 32);
22270 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
22272 length
= strlen (symb
);
22273 symbol_name
= XALLOCAVEC (char, length
+ 32);
22274 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
22276 sprintf (lazy_ptr_name
, "L%d$lz", label
);
22278 if (MACHOPIC_ATT_STUB
)
22279 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
22280 else if (MACHOPIC_PURE
)
22281 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
22283 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
22285 fprintf (file
, "%s:\n", stub
);
22286 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22288 if (MACHOPIC_ATT_STUB
)
22290 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
22292 else if (MACHOPIC_PURE
)
22295 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22296 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
22297 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
22298 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
22299 label
, lazy_ptr_name
, label
);
22300 fprintf (file
, "\tjmp\t*%%ecx\n");
22303 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
22305 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
22306 it needs no stub-binding-helper. */
22307 if (MACHOPIC_ATT_STUB
)
22310 fprintf (file
, "%s:\n", binder_name
);
22314 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
22315 fprintf (file
, "\tpushl\t%%ecx\n");
22318 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
22320 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
22322 /* N.B. Keep the correspondence of these
22323 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
22324 old-pic/new-pic/non-pic stubs; altering this will break
22325 compatibility with existing dylibs. */
22328 /* 25-byte PIC stub using "CALL get_pc_thunk". */
22329 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
22332 /* 16-byte -mdynamic-no-pic stub. */
22333 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
22335 fprintf (file
, "%s:\n", lazy_ptr_name
);
22336 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
22337 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
22339 #endif /* TARGET_MACHO */
22341 /* Order the registers for register allocator. */
22344 x86_order_regs_for_local_alloc (void)
22349 /* First allocate the local general purpose registers. */
22350 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22351 if (GENERAL_REGNO_P (i
) && call_used_or_fixed_reg_p (i
))
22352 reg_alloc_order
[pos
++] = i
;
22354 /* Global general purpose registers. */
22355 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22356 if (GENERAL_REGNO_P (i
) && !call_used_or_fixed_reg_p (i
))
22357 reg_alloc_order
[pos
++] = i
;
22359 /* x87 registers come first in case we are doing FP math
22361 if (!TARGET_SSE_MATH
)
22362 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22363 reg_alloc_order
[pos
++] = i
;
22365 /* SSE registers. */
22366 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
22367 reg_alloc_order
[pos
++] = i
;
22368 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
22369 reg_alloc_order
[pos
++] = i
;
22371 /* Extended REX SSE registers. */
22372 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
22373 reg_alloc_order
[pos
++] = i
;
22375 /* Mask register. */
22376 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
22377 reg_alloc_order
[pos
++] = i
;
22379 /* x87 registers. */
22380 if (TARGET_SSE_MATH
)
22381 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
22382 reg_alloc_order
[pos
++] = i
;
22384 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
22385 reg_alloc_order
[pos
++] = i
;
22387 /* Initialize the rest of array as we do not allocate some registers
22389 while (pos
< FIRST_PSEUDO_REGISTER
)
22390 reg_alloc_order
[pos
++] = 0;
22394 ix86_ms_bitfield_layout_p (const_tree record_type
)
22396 return ((TARGET_MS_BITFIELD_LAYOUT
22397 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
22398 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
22401 /* Returns an expression indicating where the this parameter is
22402 located on entry to the FUNCTION. */
22405 x86_this_parameter (tree function
)
22407 tree type
= TREE_TYPE (function
);
22408 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
22413 const int *parm_regs
;
22415 if (ix86_function_type_abi (type
) == MS_ABI
)
22416 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
22418 parm_regs
= x86_64_int_parameter_registers
;
22419 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
22422 nregs
= ix86_function_regparm (type
, function
);
22424 if (nregs
> 0 && !stdarg_p (type
))
22427 unsigned int ccvt
= ix86_get_callcvt (type
);
22429 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
22430 regno
= aggr
? DX_REG
: CX_REG
;
22431 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
22435 return gen_rtx_MEM (SImode
,
22436 plus_constant (Pmode
, stack_pointer_rtx
, 4));
22445 return gen_rtx_MEM (SImode
,
22446 plus_constant (Pmode
,
22447 stack_pointer_rtx
, 4));
22450 return gen_rtx_REG (SImode
, regno
);
22453 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
22457 /* Determine whether x86_output_mi_thunk can succeed. */
22460 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
22461 const_tree function
)
22463 /* 64-bit can handle anything. */
22467 /* For 32-bit, everything's fine if we have one free register. */
22468 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
22471 /* Need a free register for vcall_offset. */
22475 /* Need a free register for GOT references. */
22476 if (flag_pic
&& !targetm
.binds_local_p (function
))
22479 /* Otherwise ok. */
22483 /* Output the assembler code for a thunk function. THUNK_DECL is the
22484 declaration for the thunk function itself, FUNCTION is the decl for
22485 the target function. DELTA is an immediate constant offset to be
22486 added to THIS. If VCALL_OFFSET is nonzero, the word at
22487 *(*this + vcall_offset) should be added to THIS. */
22490 x86_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
22491 HOST_WIDE_INT vcall_offset
, tree function
)
22493 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
22494 rtx this_param
= x86_this_parameter (function
);
22495 rtx this_reg
, tmp
, fnaddr
;
22496 unsigned int tmp_regno
;
22498 int saved_flag_force_indirect_call
= flag_force_indirect_call
;
22501 tmp_regno
= R10_REG
;
22504 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
22505 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
22506 tmp_regno
= AX_REG
;
22507 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
22508 tmp_regno
= DX_REG
;
22510 tmp_regno
= CX_REG
;
22513 flag_force_indirect_call
= 0;
22516 emit_note (NOTE_INSN_PROLOGUE_END
);
22518 /* CET is enabled, insert EB instruction. */
22519 if ((flag_cf_protection
& CF_BRANCH
))
22520 emit_insn (gen_nop_endbr ());
22522 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
22523 pull it in now and let DELTA benefit. */
22524 if (REG_P (this_param
))
22525 this_reg
= this_param
;
22526 else if (vcall_offset
)
22528 /* Put the this parameter into %eax. */
22529 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
22530 emit_move_insn (this_reg
, this_param
);
22533 this_reg
= NULL_RTX
;
22535 /* Adjust the this parameter by a fixed constant. */
22538 rtx delta_rtx
= GEN_INT (delta
);
22539 rtx delta_dst
= this_reg
? this_reg
: this_param
;
22543 if (!x86_64_general_operand (delta_rtx
, Pmode
))
22545 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
22546 emit_move_insn (tmp
, delta_rtx
);
22551 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
22554 /* Adjust the this parameter by a value stored in the vtable. */
22557 rtx vcall_addr
, vcall_mem
, this_mem
;
22559 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
22561 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
22562 if (Pmode
!= ptr_mode
)
22563 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
22564 emit_move_insn (tmp
, this_mem
);
22566 /* Adjust the this parameter. */
22567 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
22569 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
22571 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
22572 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
22573 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
22576 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
22577 if (Pmode
!= ptr_mode
)
22578 emit_insn (gen_addsi_1_zext (this_reg
,
22579 gen_rtx_REG (ptr_mode
,
22583 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
22586 /* If necessary, drop THIS back to its stack slot. */
22587 if (this_reg
&& this_reg
!= this_param
)
22588 emit_move_insn (this_param
, this_reg
);
22590 fnaddr
= XEXP (DECL_RTL (function
), 0);
22593 if (!flag_pic
|| targetm
.binds_local_p (function
)
22598 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
22599 tmp
= gen_rtx_CONST (Pmode
, tmp
);
22600 fnaddr
= gen_const_mem (Pmode
, tmp
);
22605 if (!flag_pic
|| targetm
.binds_local_p (function
))
22608 else if (TARGET_MACHO
)
22610 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
22611 fnaddr
= XEXP (fnaddr
, 0);
22613 #endif /* TARGET_MACHO */
22616 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
22617 output_set_got (tmp
, NULL_RTX
);
22619 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
22620 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
22621 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
22622 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
22626 /* Our sibling call patterns do not allow memories, because we have no
22627 predicate that can distinguish between frame and non-frame memory.
22628 For our purposes here, we can get away with (ab)using a jump pattern,
22629 because we're going to do no optimization. */
22630 if (MEM_P (fnaddr
))
22632 if (sibcall_insn_operand (fnaddr
, word_mode
))
22634 fnaddr
= XEXP (DECL_RTL (function
), 0);
22635 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
22636 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
22637 tmp
= emit_call_insn (tmp
);
22638 SIBLING_CALL_P (tmp
) = 1;
22641 emit_jump_insn (gen_indirect_jump (fnaddr
));
22645 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
22647 // CM_LARGE_PIC always uses pseudo PIC register which is
22648 // uninitialized. Since FUNCTION is local and calling it
22649 // doesn't go through PLT, we use scratch register %r11 as
22650 // PIC register and initialize it here.
22651 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
22652 ix86_init_large_pic_reg (tmp_regno
);
22653 fnaddr
= legitimize_pic_address (fnaddr
,
22654 gen_rtx_REG (Pmode
, tmp_regno
));
22657 if (!sibcall_insn_operand (fnaddr
, word_mode
))
22659 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
22660 if (GET_MODE (fnaddr
) != word_mode
)
22661 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
22662 emit_move_insn (tmp
, fnaddr
);
22666 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
22667 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
22668 tmp
= emit_call_insn (tmp
);
22669 SIBLING_CALL_P (tmp
) = 1;
22673 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22674 insn
= get_insns ();
22675 shorten_branches (insn
);
22676 assemble_start_function (thunk_fndecl
, fnname
);
22677 final_start_function (insn
, file
, 1);
22678 final (insn
, file
, 1);
22679 final_end_function ();
22680 assemble_end_function (thunk_fndecl
, fnname
);
22682 flag_force_indirect_call
= saved_flag_force_indirect_call
;
22686 x86_file_start (void)
22688 default_file_start ();
22690 fputs ("\t.code16gcc\n", asm_out_file
);
22692 darwin_file_start ();
22694 if (X86_FILE_START_VERSION_DIRECTIVE
)
22695 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
22696 if (X86_FILE_START_FLTUSED
)
22697 fputs ("\t.global\t__fltused\n", asm_out_file
);
22698 if (ix86_asm_dialect
== ASM_INTEL
)
22699 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
22703 x86_field_alignment (tree type
, int computed
)
22707 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
22710 return iamcu_alignment (type
, computed
);
22711 type
= strip_array_types (type
);
22712 mode
= TYPE_MODE (type
);
22713 if (mode
== DFmode
|| mode
== DCmode
22714 || GET_MODE_CLASS (mode
) == MODE_INT
22715 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
22717 if (TYPE_ATOMIC (type
) && computed
> 32)
22719 static bool warned
;
22721 if (!warned
&& warn_psabi
)
22724 = CHANGES_ROOT_URL
"gcc-11/changes.html#ia32_atomic";
22727 inform (input_location
, "the alignment of %<_Atomic %T%> "
22728 "fields changed in %{GCC 11.1%}",
22729 TYPE_MAIN_VARIANT (type
), url
);
22733 return MIN (32, computed
);
22738 /* Print call to TARGET to FILE. */
22741 x86_print_call_or_nop (FILE *file
, const char *target
)
22743 if (flag_nop_mcount
|| !strcmp (target
, "nop"))
22744 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22745 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22747 fprintf (file
, "1:\tcall\t%s\n", target
);
22751 current_fentry_name (const char **name
)
22753 tree attr
= lookup_attribute ("fentry_name",
22754 DECL_ATTRIBUTES (current_function_decl
));
22757 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
22762 current_fentry_section (const char **name
)
22764 tree attr
= lookup_attribute ("fentry_section",
22765 DECL_ATTRIBUTES (current_function_decl
));
22768 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
22772 /* Return a caller-saved register which isn't live or a callee-saved
22773 register which has been saved on stack in the prologue at entry for
22777 x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED
)
22779 /* Use %r10 if the profiler is emitted before the prologue or it isn't
22781 if (ix86_profile_before_prologue ()
22783 || REGNO (crtl
->drap_reg
) != R10_REG
)
22786 /* The profiler is emitted after the prologue. If there is a
22787 caller-saved register which isn't live or a callee-saved
22788 register saved on stack in the prologue, use it. */
22790 bitmap reg_live
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
22793 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
22794 if (GENERAL_REGNO_P (i
)
22796 #ifdef NO_PROFILE_COUNTERS
22797 && (r11_ok
|| i
!= R11_REG
)
22801 && TEST_HARD_REG_BIT (accessible_reg_set
, i
)
22802 && (ix86_save_reg (i
, true, true)
22803 || (call_used_regs
[i
]
22805 && !REGNO_REG_SET_P (reg_live
, i
))))
22808 sorry ("no register available for profiling %<-mcmodel=large%s%>",
22809 ix86_cmodel
== CM_LARGE_PIC
? " -fPIC" : "");
22814 /* Output assembler code to FILE to increment profiler label # LABELNO
22815 for profiling a function entry. */
22817 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
22819 if (cfun
->machine
->insn_queued_at_entrance
)
22821 if (cfun
->machine
->insn_queued_at_entrance
== TYPE_ENDBR
)
22822 fprintf (file
, "\t%s\n", TARGET_64BIT
? "endbr64" : "endbr32");
22823 unsigned int patch_area_size
22824 = crtl
->patch_area_size
- crtl
->patch_area_entry
;
22825 if (patch_area_size
)
22826 ix86_output_patchable_area (patch_area_size
,
22827 crtl
->patch_area_entry
== 0);
22830 const char *mcount_name
= MCOUNT_NAME
;
22832 if (current_fentry_name (&mcount_name
))
22834 else if (fentry_name
)
22835 mcount_name
= fentry_name
;
22836 else if (flag_fentry
)
22837 mcount_name
= MCOUNT_NAME_BEFORE_PROLOGUE
;
22841 #ifndef NO_PROFILE_COUNTERS
22842 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22843 fprintf (file
, "\tlea\tr11, %sP%d[rip]\n", LPREFIX
, labelno
);
22845 fprintf (file
, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX
, labelno
);
22850 char legacy_reg
[4] = { 0 };
22852 if (!TARGET_PECOFF
)
22854 switch (ix86_cmodel
)
22857 scratch
= x86_64_select_profile_regnum (true);
22858 reg
= hi_reg_name
[scratch
];
22859 if (LEGACY_INT_REGNO_P (scratch
))
22861 legacy_reg
[0] = 'r';
22862 legacy_reg
[1] = reg
[0];
22863 legacy_reg
[2] = reg
[1];
22866 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22867 fprintf (file
, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
22868 "\tcall\t%s\n", reg
, mcount_name
, reg
);
22870 fprintf (file
, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
22871 mcount_name
, reg
, reg
);
22874 #ifdef NO_PROFILE_COUNTERS
22875 scratch
= x86_64_select_profile_regnum (false);
22876 reg
= hi_reg_name
[scratch
];
22877 if (LEGACY_INT_REGNO_P (scratch
))
22879 legacy_reg
[0] = 'r';
22880 legacy_reg
[1] = reg
[0];
22881 legacy_reg
[2] = reg
[1];
22884 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22886 fprintf (file
, "1:movabs\tr11, "
22887 "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
22888 fprintf (file
, "\tlea\t%s, 1b[rip]\n", reg
);
22889 fprintf (file
, "\tadd\t%s, r11\n", reg
);
22890 fprintf (file
, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
22892 fprintf (file
, "\tadd\t%s, r11\n", reg
);
22893 fprintf (file
, "\tcall\t%s\n", reg
);
22897 "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
22898 fprintf (file
, "\tleaq\t1b(%%rip), %%%s\n", reg
);
22899 fprintf (file
, "\taddq\t%%r11, %%%s\n", reg
);
22900 fprintf (file
, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name
);
22901 fprintf (file
, "\taddq\t%%r11, %%%s\n", reg
);
22902 fprintf (file
, "\tcall\t*%%%s\n", reg
);
22904 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
22908 case CM_MEDIUM_PIC
:
22909 if (!ix86_direct_extern_access
)
22911 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22912 fprintf (file
, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
22915 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
22921 x86_print_call_or_nop (file
, mcount_name
);
22926 x86_print_call_or_nop (file
, mcount_name
);
22930 #ifndef NO_PROFILE_COUNTERS
22931 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22933 "\tlea\t" PROFILE_COUNT_REGISTER
", %sP%d@GOTOFF[ebx]\n",
22937 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER
"\n",
22940 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22941 fprintf (file
, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name
);
22943 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
22947 #ifndef NO_PROFILE_COUNTERS
22948 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
22950 "\tmov\t" PROFILE_COUNT_REGISTER
", OFFSET FLAT:%sP%d\n",
22953 fprintf (file
, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER
"\n",
22956 x86_print_call_or_nop (file
, mcount_name
);
22959 if (flag_record_mcount
22960 || lookup_attribute ("fentry_section",
22961 DECL_ATTRIBUTES (current_function_decl
)))
22963 const char *sname
= "__mcount_loc";
22965 if (current_fentry_section (&sname
))
22967 else if (fentry_section
)
22968 sname
= fentry_section
;
22970 fprintf (file
, "\t.section %s, \"a\",@progbits\n", sname
);
22971 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
22972 fprintf (file
, "\t.previous\n");
22976 /* We don't have exact information about the insn sizes, but we may assume
22977 quite safely that we are informed about all 1 byte insns and memory
22978 address sizes. This is enough to eliminate unnecessary padding in
22982 ix86_min_insn_size (rtx_insn
*insn
)
22986 if (!INSN_P (insn
) || !active_insn_p (insn
))
22989 /* Discard alignments we've emit and jump instructions. */
22990 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
22991 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
22994 /* Important case - calls are always 5 bytes.
22995 It is common to have many calls in the row. */
22997 && symbolic_reference_mentioned_p (PATTERN (insn
))
22998 && !SIBLING_CALL_P (insn
))
23000 len
= get_attr_length (insn
);
23004 /* For normal instructions we rely on get_attr_length being exact,
23005 with a few exceptions. */
23006 if (!JUMP_P (insn
))
23008 enum attr_type type
= get_attr_type (insn
);
23013 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
23014 || asm_noperands (PATTERN (insn
)) >= 0)
23021 /* Otherwise trust get_attr_length. */
23025 l
= get_attr_length_address (insn
);
23026 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
23035 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23037 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23041 ix86_avoid_jump_mispredicts (void)
23043 rtx_insn
*insn
, *start
= get_insns ();
23044 int nbytes
= 0, njumps
= 0;
23045 bool isjump
= false;
23047 /* Look for all minimal intervals of instructions containing 4 jumps.
23048 The intervals are bounded by START and INSN. NBYTES is the total
23049 size of instructions in the interval including INSN and not including
23050 START. When the NBYTES is smaller than 16 bytes, it is possible
23051 that the end of START and INSN ends up in the same 16byte page.
23053 The smallest offset in the page INSN can start is the case where START
23054 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23055 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
23057 Don't consider asm goto as jump, while it can contain a jump, it doesn't
23058 have to, control transfer to label(s) can be performed through other
23059 means, and also we estimate minimum length of all asm stmts as 0. */
23060 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
23064 if (LABEL_P (insn
))
23066 align_flags alignment
= label_to_alignment (insn
);
23067 int align
= alignment
.levels
[0].log
;
23068 int max_skip
= alignment
.levels
[0].maxskip
;
23072 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
23073 already in the current 16 byte page, because otherwise
23074 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
23075 bytes to reach 16 byte boundary. */
23077 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
23080 fprintf (dump_file
, "Label %i with max_skip %i\n",
23081 INSN_UID (insn
), max_skip
);
23084 while (nbytes
+ max_skip
>= 16)
23086 start
= NEXT_INSN (start
);
23087 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
23089 njumps
--, isjump
= true;
23092 nbytes
-= ix86_min_insn_size (start
);
23098 min_size
= ix86_min_insn_size (insn
);
23099 nbytes
+= min_size
;
23101 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
23102 INSN_UID (insn
), min_size
);
23103 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
23111 start
= NEXT_INSN (start
);
23112 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
23114 njumps
--, isjump
= true;
23117 nbytes
-= ix86_min_insn_size (start
);
23119 gcc_assert (njumps
>= 0);
23121 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
23122 INSN_UID (start
), INSN_UID (insn
), nbytes
);
23124 if (njumps
== 3 && isjump
&& nbytes
< 16)
23126 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
23129 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
23130 INSN_UID (insn
), padsize
);
23131 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
23137 /* AMD Athlon works faster
23138 when RET is not destination of conditional jump or directly preceded
23139 by other jump instruction. We avoid the penalty by inserting NOP just
23140 before the RET instructions in such cases. */
23142 ix86_pad_returns (void)
23147 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
23149 basic_block bb
= e
->src
;
23150 rtx_insn
*ret
= BB_END (bb
);
23152 bool replace
= false;
23154 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
23155 || optimize_bb_for_size_p (bb
))
23157 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
23158 if (active_insn_p (prev
) || LABEL_P (prev
))
23160 if (prev
&& LABEL_P (prev
))
23165 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
23166 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
23167 && !(e
->flags
& EDGE_FALLTHRU
))
23175 prev
= prev_active_insn (ret
);
23177 && ((JUMP_P (prev
) && any_condjump_p (prev
))
23180 /* Empty functions get branch mispredict even when
23181 the jump destination is not visible to us. */
23182 if (!prev
&& !optimize_function_for_size_p (cfun
))
23187 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
23193 /* Count the minimum number of instructions in BB. Return 4 if the
23194 number of instructions >= 4. */
23197 ix86_count_insn_bb (basic_block bb
)
23200 int insn_count
= 0;
23202 /* Count number of instructions in this block. Return 4 if the number
23203 of instructions >= 4. */
23204 FOR_BB_INSNS (bb
, insn
)
23206 /* Only happen in exit blocks. */
23208 && ANY_RETURN_P (PATTERN (insn
)))
23211 if (NONDEBUG_INSN_P (insn
)
23212 && GET_CODE (PATTERN (insn
)) != USE
23213 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
23216 if (insn_count
>= 4)
23225 /* Count the minimum number of instructions in code path in BB.
23226 Return 4 if the number of instructions >= 4. */
23229 ix86_count_insn (basic_block bb
)
23233 int min_prev_count
;
23235 /* Only bother counting instructions along paths with no
23236 more than 2 basic blocks between entry and exit. Given
23237 that BB has an edge to exit, determine if a predecessor
23238 of BB has an edge from entry. If so, compute the number
23239 of instructions in the predecessor block. If there
23240 happen to be multiple such blocks, compute the minimum. */
23241 min_prev_count
= 4;
23242 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
23245 edge_iterator prev_ei
;
23247 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
23249 min_prev_count
= 0;
23252 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
23254 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
23256 int count
= ix86_count_insn_bb (e
->src
);
23257 if (count
< min_prev_count
)
23258 min_prev_count
= count
;
23264 if (min_prev_count
< 4)
23265 min_prev_count
+= ix86_count_insn_bb (bb
);
23267 return min_prev_count
;
23270 /* Pad short function to 4 instructions. */
23273 ix86_pad_short_function (void)
23278 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
23280 rtx_insn
*ret
= BB_END (e
->src
);
23281 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
23283 int insn_count
= ix86_count_insn (e
->src
);
23285 /* Pad short function. */
23286 if (insn_count
< 4)
23288 rtx_insn
*insn
= ret
;
23290 /* Find epilogue. */
23293 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
23294 insn
= PREV_INSN (insn
);
23299 /* Two NOPs count as one instruction. */
23300 insn_count
= 2 * (4 - insn_count
);
23301 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
23307 /* Fix up a Windows system unwinder issue. If an EH region falls through into
23308 the epilogue, the Windows system unwinder will apply epilogue logic and
23309 produce incorrect offsets. This can be avoided by adding a nop between
23310 the last insn that can throw and the first insn of the epilogue. */
23313 ix86_seh_fixup_eh_fallthru (void)
23318 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
23320 rtx_insn
*insn
, *next
;
23322 /* Find the beginning of the epilogue. */
23323 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
23324 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
23329 /* We only care about preceding insns that can throw. */
23330 insn
= prev_active_insn (insn
);
23331 if (insn
== NULL
|| !can_throw_internal (insn
))
23334 /* Do not separate calls from their debug information. */
23335 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
23336 if (NOTE_P (next
) && NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
)
23341 emit_insn_after (gen_nops (const1_rtx
), insn
);
23344 /* Split vector load from parm_decl to elemental loads to avoid STLF
23347 ix86_split_stlf_stall_load ()
23349 rtx_insn
* insn
, *start
= get_insns ();
23350 unsigned window
= 0;
23352 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
23354 if (!NONDEBUG_INSN_P (insn
))
23357 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
23358 other, just emulate for pipeline) before stalled load, stlf stall
23359 case is as fast as no stall cases on CLX.
23360 Since CFG is freed before machine_reorg, just do a rough
23361 calculation of the window according to the layout. */
23362 if (window
> (unsigned) x86_stlf_window_ninsns
)
23365 if (any_uncondjump_p (insn
)
23366 || ANY_RETURN_P (PATTERN (insn
))
23370 rtx set
= single_set (insn
);
23373 rtx src
= SET_SRC (set
);
23375 /* Only handle V2DFmode load since it doesn't need any scratch
23377 || GET_MODE (src
) != E_V2DFmode
23379 || TREE_CODE (get_base_address (MEM_EXPR (src
))) != PARM_DECL
)
23382 rtx zero
= CONST0_RTX (V2DFmode
);
23383 rtx dest
= SET_DEST (set
);
23384 rtx m
= adjust_address (src
, DFmode
, 0);
23385 rtx loadlpd
= gen_sse2_loadlpd (dest
, zero
, m
);
23386 emit_insn_before (loadlpd
, insn
);
23387 m
= adjust_address (src
, DFmode
, 8);
23388 rtx loadhpd
= gen_sse2_loadhpd (dest
, dest
, m
);
23389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
23391 fputs ("Due to potential STLF stall, split instruction:\n",
23393 print_rtl_single (dump_file
, insn
);
23394 fputs ("To:\n", dump_file
);
23395 print_rtl_single (dump_file
, loadlpd
);
23396 print_rtl_single (dump_file
, loadhpd
);
23398 PATTERN (insn
) = loadhpd
;
23399 INSN_CODE (insn
) = -1;
23400 gcc_assert (recog_memoized (insn
) != -1);
23404 /* Implement machine specific optimizations. We implement padding of returns
23405 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23409 /* We are freeing block_for_insn in the toplev to keep compatibility
23410 with old MDEP_REORGS that are not CFG based. Recompute it now. */
23411 compute_bb_for_insn ();
23413 if (TARGET_SEH
&& current_function_has_exception_handlers ())
23414 ix86_seh_fixup_eh_fallthru ();
23416 if (optimize
&& optimize_function_for_speed_p (cfun
))
23419 ix86_split_stlf_stall_load ();
23420 if (TARGET_PAD_SHORT_FUNCTION
)
23421 ix86_pad_short_function ();
23422 else if (TARGET_PAD_RETURNS
)
23423 ix86_pad_returns ();
23424 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
23425 if (TARGET_FOUR_JUMP_LIMIT
)
23426 ix86_avoid_jump_mispredicts ();
23431 /* Return nonzero when QImode register that must be represented via REX prefix
23434 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
23437 extract_insn_cached (insn
);
23438 for (i
= 0; i
< recog_data
.n_operands
; i
++)
23439 if (GENERAL_REG_P (recog_data
.operand
[i
])
23440 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
23445 /* Return true when INSN mentions register that must be encoded using REX
23448 x86_extended_reg_mentioned_p (rtx insn
)
23450 subrtx_iterator::array_type array
;
23451 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
23453 const_rtx x
= *iter
;
23455 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))
23456 || REX2_INT_REGNO_P (REGNO (x
))))
23462 /* Return true when INSN mentions register that must be encoded using REX2
23465 x86_extended_rex2reg_mentioned_p (rtx insn
)
23467 subrtx_iterator::array_type array
;
23468 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
23470 const_rtx x
= *iter
;
23471 if (REG_P (x
) && REX2_INT_REGNO_P (REGNO (x
)))
23477 /* Return true when rtx operands mentions register that must be encoded using
23480 x86_evex_reg_mentioned_p (rtx operands
[], int nops
)
23483 for (i
= 0; i
< nops
; i
++)
23484 if (EXT_REX_SSE_REG_P (operands
[i
])
23485 || x86_extended_rex2reg_mentioned_p (operands
[i
]))
23490 /* If profitable, negate (without causing overflow) integer constant
23491 of mode MODE at location LOC. Return true in this case. */
23493 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
23497 if (!CONST_INT_P (*loc
))
23503 /* DImode x86_64 constants must fit in 32 bits. */
23504 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
23515 gcc_unreachable ();
23518 /* Avoid overflows. */
23519 if (mode_signbit_p (mode
, *loc
))
23522 val
= INTVAL (*loc
);
23524 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
23525 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
23526 if ((val
< 0 && val
!= -128)
23529 *loc
= GEN_INT (-val
);
23536 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23537 optabs would emit if we didn't have TFmode patterns. */
23540 x86_emit_floatuns (rtx operands
[2])
23542 rtx_code_label
*neglab
, *donelab
;
23543 rtx i0
, i1
, f0
, in
, out
;
23544 machine_mode mode
, inmode
;
23546 inmode
= GET_MODE (operands
[1]);
23547 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
23550 in
= force_reg (inmode
, operands
[1]);
23551 mode
= GET_MODE (out
);
23552 neglab
= gen_label_rtx ();
23553 donelab
= gen_label_rtx ();
23554 f0
= gen_reg_rtx (mode
);
23556 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
23558 expand_float (out
, in
, 0);
23560 emit_jump_insn (gen_jump (donelab
));
23563 emit_label (neglab
);
23565 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
23567 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
23569 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
23571 expand_float (f0
, i0
, 0);
23573 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
23575 emit_label (donelab
);
23578 /* Return the diagnostic message string if conversion from FROMTYPE to
23579 TOTYPE is not allowed, NULL otherwise. */
23581 static const char *
23582 ix86_invalid_conversion (const_tree fromtype
, const_tree totype
)
23584 machine_mode from_mode
= element_mode (fromtype
);
23585 machine_mode to_mode
= element_mode (totype
);
23587 if (!TARGET_SSE2
&& from_mode
!= to_mode
)
23589 /* Do no allow conversions to/from BFmode/HFmode scalar types
23590 when TARGET_SSE2 is not available. */
23591 if (from_mode
== BFmode
)
23592 return N_("invalid conversion from type %<__bf16%> "
23593 "without option %<-msse2%>");
23594 if (from_mode
== HFmode
)
23595 return N_("invalid conversion from type %<_Float16%> "
23596 "without option %<-msse2%>");
23597 if (to_mode
== BFmode
)
23598 return N_("invalid conversion to type %<__bf16%> "
23599 "without option %<-msse2%>");
23600 if (to_mode
== HFmode
)
23601 return N_("invalid conversion to type %<_Float16%> "
23602 "without option %<-msse2%>");
23605 /* Warn for silent implicit conversion between __bf16 and short,
23606 since __bfloat16 is refined as real __bf16 instead of short
23608 if (element_mode (fromtype
) != element_mode (totype
)
23609 && (TARGET_AVX512BF16
|| TARGET_AVXNECONVERT
))
23611 /* Warn for silent implicit conversion where user may expect
23613 if ((TYPE_MODE (fromtype
) == BFmode
23614 && TYPE_MODE (totype
) == HImode
)
23615 || (TYPE_MODE (totype
) == BFmode
23616 && TYPE_MODE (fromtype
) == HImode
))
23617 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
23618 "to real %<__bf16%> since GCC 13.1, be careful of "
23619 "implicit conversion between %<__bf16%> and %<short%>; "
23620 "an explicit bitcast may be needed here");
23623 /* Conversion allowed. */
23627 /* Return the diagnostic message string if the unary operation OP is
23628 not permitted on TYPE, NULL otherwise. */
23630 static const char *
23631 ix86_invalid_unary_op (int op
, const_tree type
)
23633 machine_mode mmode
= element_mode (type
);
23634 /* Reject all single-operand operations on BFmode/HFmode except for &
23635 when TARGET_SSE2 is not available. */
23636 if (!TARGET_SSE2
&& op
!= ADDR_EXPR
)
23638 if (mmode
== BFmode
)
23639 return N_("operation not permitted on type %<__bf16%> "
23640 "without option %<-msse2%>");
23641 if (mmode
== HFmode
)
23642 return N_("operation not permitted on type %<_Float16%> "
23643 "without option %<-msse2%>");
23646 /* Operation allowed. */
23650 /* Return the diagnostic message string if the binary operation OP is
23651 not permitted on TYPE1 and TYPE2, NULL otherwise. */
23653 static const char *
23654 ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
23657 machine_mode type1_mode
= element_mode (type1
);
23658 machine_mode type2_mode
= element_mode (type2
);
23659 /* Reject all 2-operand operations on BFmode or HFmode
23660 when TARGET_SSE2 is not available. */
23663 if (type1_mode
== BFmode
|| type2_mode
== BFmode
)
23664 return N_("operation not permitted on type %<__bf16%> "
23665 "without option %<-msse2%>");
23667 if (type1_mode
== HFmode
|| type2_mode
== HFmode
)
23668 return N_("operation not permitted on type %<_Float16%> "
23669 "without option %<-msse2%>");
23672 /* Operation allowed. */
23677 /* Target hook for scalar_mode_supported_p. */
23679 ix86_scalar_mode_supported_p (scalar_mode mode
)
23681 if (DECIMAL_FLOAT_MODE_P (mode
))
23682 return default_decimal_float_supported_p ();
23683 else if (mode
== TFmode
)
23685 else if (mode
== HFmode
|| mode
== BFmode
)
23688 return default_scalar_mode_supported_p (mode
);
23691 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
23692 if MODE is HFmode, and punt to the generic implementation otherwise. */
23695 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
23697 /* NB: Always return TRUE for HFmode so that the _Float16 type will
23698 be defined by the C front-end for AVX512FP16 intrinsics. We will
23699 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
23701 return ((mode
== HFmode
|| mode
== BFmode
)
23703 : default_libgcc_floating_mode_supported_p (mode
));
23706 /* Implements target hook vector_mode_supported_p. */
23708 ix86_vector_mode_supported_p (machine_mode mode
)
23710 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
23712 if (!TARGET_64BIT
&& GET_MODE_INNER (mode
) == TImode
)
23714 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
23716 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
23718 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
23720 if (TARGET_AVX512F
&& TARGET_EVEX512
&& VALID_AVX512F_REG_MODE (mode
))
23722 if ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
23723 && VALID_MMX_REG_MODE (mode
))
23725 if ((TARGET_3DNOW
|| TARGET_MMX_WITH_SSE
)
23726 && VALID_MMX_REG_MODE_3DNOW (mode
))
23728 if (mode
== V2QImode
)
23733 /* Target hook for c_mode_for_suffix. */
23734 static machine_mode
23735 ix86_c_mode_for_suffix (char suffix
)
23745 /* Helper function to map common constraints to non-EGPR ones.
23746 All related constraints have h prefix, and h plus Upper letter
23747 means the constraint is strictly EGPR enabled, while h plus
23748 lower letter indicates the constraint is strictly gpr16 only.
23750 Specially for "g" constraint, split it to rmi as there is
23751 no corresponding general constraint define for backend.
23753 Here is the full list to map constraints that may involve
23767 static void map_egpr_constraints (vec
<const char *> &constraints
)
23769 for (size_t i
= 0; i
< constraints
.length(); i
++)
23771 const char *cur
= constraints
[i
];
23773 if (startswith (cur
, "=@cc"))
23776 int len
= strlen (cur
);
23777 auto_vec
<char> buf
;
23779 for (int j
= 0; j
< len
; j
++)
23784 buf
.safe_push ('j');
23785 buf
.safe_push ('r');
23786 buf
.safe_push ('j');
23787 buf
.safe_push ('m');
23788 buf
.safe_push ('i');
23797 buf
.safe_push ('j');
23798 buf
.safe_push (cur
[j
]);
23801 if (cur
[j
+ 1] == 'm')
23803 buf
.safe_push ('j');
23804 buf
.safe_push ('a');
23809 buf
.safe_push (cur
[j
]);
23810 buf
.safe_push (cur
[j
+ 1]);
23818 buf
.safe_push (cur
[j
]);
23819 buf
.safe_push (cur
[j
+ 1]);
23823 buf
.safe_push (cur
[j
]);
23827 buf
.safe_push ('\0');
23828 constraints
[i
] = xstrdup (buf
.address ());
23832 /* Worker function for TARGET_MD_ASM_ADJUST.
23834 We implement asm flag outputs, and maintain source compatibility
23835 with the old cc0-based compiler. */
23838 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
23839 vec
<machine_mode
> & /*input_modes*/,
23840 vec
<const char *> &constraints
, vec
<rtx
> &/*uses*/,
23841 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
,
23844 bool saw_asm_flag
= false;
23848 if (TARGET_APX_EGPR
&& !ix86_apx_inline_asm_use_gpr32
)
23849 map_egpr_constraints (constraints
);
23851 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
23853 const char *con
= constraints
[i
];
23854 if (!startswith (con
, "=@cc"))
23857 if (strchr (con
, ',') != NULL
)
23859 error_at (loc
, "alternatives not allowed in %<asm%> flag output");
23863 bool invert
= false;
23865 invert
= true, con
++;
23867 machine_mode mode
= CCmode
;
23868 rtx_code code
= UNKNOWN
;
23874 mode
= CCAmode
, code
= EQ
;
23875 else if (con
[1] == 'e' && con
[2] == 0)
23876 mode
= CCCmode
, code
= NE
;
23880 mode
= CCCmode
, code
= EQ
;
23881 else if (con
[1] == 'e' && con
[2] == 0)
23882 mode
= CCAmode
, code
= NE
;
23886 mode
= CCCmode
, code
= EQ
;
23890 mode
= CCZmode
, code
= EQ
;
23894 mode
= CCGCmode
, code
= GT
;
23895 else if (con
[1] == 'e' && con
[2] == 0)
23896 mode
= CCGCmode
, code
= GE
;
23900 mode
= CCGCmode
, code
= LT
;
23901 else if (con
[1] == 'e' && con
[2] == 0)
23902 mode
= CCGCmode
, code
= LE
;
23906 mode
= CCOmode
, code
= EQ
;
23910 mode
= CCPmode
, code
= EQ
;
23914 mode
= CCSmode
, code
= EQ
;
23918 mode
= CCZmode
, code
= EQ
;
23921 if (code
== UNKNOWN
)
23923 error_at (loc
, "unknown %<asm%> flag output %qs", constraints
[i
]);
23927 code
= reverse_condition (code
);
23929 rtx dest
= outputs
[i
];
23932 /* This is the first asm flag output. Here we put the flags
23933 register in as the real output and adjust the condition to
23935 constraints
[i
] = "=Bf";
23936 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
23937 saw_asm_flag
= true;
23941 /* We don't need the flags register as output twice. */
23942 constraints
[i
] = "=X";
23943 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
23946 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
23947 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
23949 machine_mode dest_mode
= GET_MODE (dest
);
23950 if (!SCALAR_INT_MODE_P (dest_mode
))
23952 error_at (loc
, "invalid type for %<asm%> flag output");
23956 if (dest_mode
== QImode
)
23957 emit_insn (gen_rtx_SET (dest
, x
));
23960 rtx reg
= gen_reg_rtx (QImode
);
23961 emit_insn (gen_rtx_SET (reg
, x
));
23963 reg
= convert_to_mode (dest_mode
, reg
, 1);
23964 emit_move_insn (dest
, reg
);
23968 rtx_insn
*seq
= get_insns ();
23975 /* If we had no asm flag outputs, clobber the flags. */
23976 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
23977 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
23982 /* Implements target vector targetm.asm.encode_section_info. */
23984 static void ATTRIBUTE_UNUSED
23985 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
23987 default_encode_section_info (decl
, rtl
, first
);
23989 if (ix86_in_large_data_p (decl
))
23990 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
23993 /* Worker function for REVERSE_CONDITION. */
23996 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
23998 return (mode
== CCFPmode
23999 ? reverse_condition_maybe_unordered (code
)
24000 : reverse_condition (code
));
24003 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24007 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
24009 if (REG_P (operands
[0]))
24011 if (REG_P (operands
[1])
24012 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
24014 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
24015 return output_387_ffreep (operands
, 0);
24016 return "fstp\t%y0";
24018 if (STACK_TOP_P (operands
[0]))
24019 return "fld%Z1\t%y1";
24022 else if (MEM_P (operands
[0]))
24024 gcc_assert (REG_P (operands
[1]));
24025 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
24026 return "fstp%Z0\t%y0";
24029 /* There is no non-popping store to memory for XFmode.
24030 So if we need one, follow the store with a load. */
24031 if (GET_MODE (operands
[0]) == XFmode
)
24032 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
24034 return "fst%Z0\t%y0";
24040 #ifdef TARGET_SOLARIS
24041 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24044 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
24047 /* With Binutils 2.15, the "@unwind" marker must be specified on
24048 every occurrence of the ".eh_frame" section, not just the first
24051 && strcmp (name
, ".eh_frame") == 0)
24053 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
24054 flags
& SECTION_WRITE
? "aw" : "a");
24059 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
24061 solaris_elf_asm_comdat_section (name
, flags
, decl
);
24065 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
24066 SPARC assembler. One cannot mix single-letter flags and #exclude, so
24067 only emit the latter here. */
24068 if (flags
& SECTION_EXCLUDE
)
24070 fprintf (asm_out_file
, "\t.section\t%s,#exclude\n", name
);
24075 default_elf_asm_named_section (name
, flags
, decl
);
24077 #endif /* TARGET_SOLARIS */
24079 /* Return the mangling of TYPE if it is an extended fundamental type. */
24081 static const char *
24082 ix86_mangle_type (const_tree type
)
24084 type
= TYPE_MAIN_VARIANT (type
);
24086 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
24087 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
24090 if (type
== float128_type_node
|| type
== float64x_type_node
)
24093 switch (TYPE_MODE (type
))
24098 /* _Float16 is "DF16_".
24099 Align with clang's decision in https://reviews.llvm.org/D33719. */
24102 /* __float128 is "g". */
24105 /* "long double" or __float80 is "e". */
24112 /* Create C++ tinfo symbols for only conditionally available fundamental
24116 ix86_emit_support_tinfos (emit_support_tinfos_callback callback
)
24118 extern tree ix86_float16_type_node
;
24119 extern tree ix86_bf16_type_node
;
24123 if (!float16_type_node
)
24124 float16_type_node
= ix86_float16_type_node
;
24125 if (!bfloat16_type_node
)
24126 bfloat16_type_node
= ix86_bf16_type_node
;
24127 callback (float16_type_node
);
24128 callback (bfloat16_type_node
);
24129 float16_type_node
= NULL_TREE
;
24130 bfloat16_type_node
= NULL_TREE
;
24134 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
24137 ix86_stack_protect_guard (void)
24139 if (TARGET_SSP_TLS_GUARD
)
24141 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
24142 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
24143 tree type
= build_qualified_type (type_node
, qual
);
24146 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str
))
24148 t
= ix86_tls_stack_chk_guard_decl
;
24155 (UNKNOWN_LOCATION
, VAR_DECL
,
24156 get_identifier (ix86_stack_protector_guard_symbol_str
),
24158 TREE_STATIC (t
) = 1;
24159 TREE_PUBLIC (t
) = 1;
24160 DECL_EXTERNAL (t
) = 1;
24162 TREE_THIS_VOLATILE (t
) = 1;
24163 DECL_ARTIFICIAL (t
) = 1;
24164 DECL_IGNORED_P (t
) = 1;
24166 /* Do not share RTL as the declaration is visible outside of
24167 current function. */
24169 RTX_FLAG (x
, used
) = 1;
24171 ix86_tls_stack_chk_guard_decl
= t
;
24176 tree asptrtype
= build_pointer_type (type
);
24178 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
24179 t
= build2 (MEM_REF
, asptrtype
, t
,
24180 build_int_cst (asptrtype
, 0));
24181 TREE_THIS_VOLATILE (t
) = 1;
24187 return default_stack_protect_guard ();
24190 /* For 32-bit code we can save PIC register setup by using
24191 __stack_chk_fail_local hidden function instead of calling
24192 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24193 register, so it is better to call __stack_chk_fail directly. */
24195 static tree ATTRIBUTE_UNUSED
24196 ix86_stack_protect_fail (void)
24198 return TARGET_64BIT
24199 ? default_external_stack_protect_fail ()
24200 : default_hidden_stack_protect_fail ();
24203 /* Select a format to encode pointers in exception handling data. CODE
24204 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24205 true if the symbol may be affected by dynamic relocations.
24207 ??? All x86 object file formats are capable of representing this.
24208 After all, the relocation needed is the same as for the call insn.
24209 Whether or not a particular assembler allows us to enter such, I
24210 guess we'll have to see. */
24213 asm_preferred_eh_data_format (int code
, int global
)
24215 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
24216 if (flag_pic
|| TARGET_PECOFF
|| !ix86_direct_extern_access
)
24218 int type
= DW_EH_PE_sdata8
;
24219 if (ptr_mode
== SImode
24220 || ix86_cmodel
== CM_SMALL_PIC
24221 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
24222 type
= DW_EH_PE_sdata4
;
24223 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
24226 if (ix86_cmodel
== CM_SMALL
24227 || (ix86_cmodel
== CM_MEDIUM
&& code
))
24228 return DW_EH_PE_udata4
;
24230 return DW_EH_PE_absptr
;
24233 /* Implement targetm.vectorize.builtin_vectorization_cost. */
24235 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
24239 machine_mode mode
= TImode
;
24241 if (vectype
!= NULL
)
24243 fp
= FLOAT_TYPE_P (vectype
);
24244 mode
= TYPE_MODE (vectype
);
24247 switch (type_of_cost
)
24250 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
24253 /* load/store costs are relative to register move which is 2. Recompute
24254 it to COSTS_N_INSNS so everything have same base. */
24255 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
24256 : ix86_cost
->int_load
[2]) / 2;
24259 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
24260 : ix86_cost
->int_store
[2]) / 2;
24263 return ix86_vec_cost (mode
,
24264 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
);
24267 index
= sse_store_index (mode
);
24268 /* See PR82713 - we may end up being called on non-vector type. */
24271 return COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2;
24274 index
= sse_store_index (mode
);
24275 /* See PR82713 - we may end up being called on non-vector type. */
24278 return COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2;
24280 case vec_to_scalar
:
24281 case scalar_to_vec
:
24282 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
24284 /* We should have separate costs for unaligned loads and gather/scatter.
24285 Do that incrementally. */
24286 case unaligned_load
:
24287 index
= sse_store_index (mode
);
24288 /* See PR82713 - we may end up being called on non-vector type. */
24291 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_load
[index
]) / 2;
24293 case unaligned_store
:
24294 index
= sse_store_index (mode
);
24295 /* See PR82713 - we may end up being called on non-vector type. */
24298 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_store
[index
]) / 2;
24300 case vector_gather_load
:
24301 return ix86_vec_cost (mode
,
24303 (ix86_cost
->gather_static
24304 + ix86_cost
->gather_per_elt
24305 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
24307 case vector_scatter_store
:
24308 return ix86_vec_cost (mode
,
24310 (ix86_cost
->scatter_static
24311 + ix86_cost
->scatter_per_elt
24312 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
24314 case cond_branch_taken
:
24315 return ix86_cost
->cond_taken_branch_cost
;
24317 case cond_branch_not_taken
:
24318 return ix86_cost
->cond_not_taken_branch_cost
;
24321 case vec_promote_demote
:
24322 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
24324 case vec_construct
:
24326 int n
= TYPE_VECTOR_SUBPARTS (vectype
);
24327 /* N - 1 element inserts into an SSE vector, the possible
24328 GPR -> XMM move is accounted for in add_stmt_cost. */
24329 if (GET_MODE_BITSIZE (mode
) <= 128)
24330 return (n
- 1) * ix86_cost
->sse_op
;
24331 /* One vinserti128 for combining two SSE vectors for AVX256. */
24332 else if (GET_MODE_BITSIZE (mode
) == 256)
24333 return ((n
- 2) * ix86_cost
->sse_op
24334 + ix86_vec_cost (mode
, ix86_cost
->addss
));
24335 /* One vinserti64x4 and two vinserti128 for combining SSE
24336 and AVX256 vectors to AVX512. */
24337 else if (GET_MODE_BITSIZE (mode
) == 512)
24338 return ((n
- 4) * ix86_cost
->sse_op
24339 + 3 * ix86_vec_cost (mode
, ix86_cost
->addss
));
24340 gcc_unreachable ();
24344 gcc_unreachable ();
24349 /* This function returns the calling abi specific va_list type node.
24350 It returns the FNDECL specific va_list type. */
24353 ix86_fn_abi_va_list (tree fndecl
)
24356 return va_list_type_node
;
24357 gcc_assert (fndecl
!= NULL_TREE
);
24359 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
24360 return ms_va_list_type_node
;
24362 return sysv_va_list_type_node
;
24365 /* Returns the canonical va_list type specified by TYPE. If there
24366 is no valid TYPE provided, it return NULL_TREE. */
24369 ix86_canonical_va_list_type (tree type
)
24373 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
24374 return ms_va_list_type_node
;
24376 if ((TREE_CODE (type
) == ARRAY_TYPE
24377 && integer_zerop (array_type_nelts (type
)))
24378 || POINTER_TYPE_P (type
))
24380 tree elem_type
= TREE_TYPE (type
);
24381 if (TREE_CODE (elem_type
) == RECORD_TYPE
24382 && lookup_attribute ("sysv_abi va_list",
24383 TYPE_ATTRIBUTES (elem_type
)))
24384 return sysv_va_list_type_node
;
24390 return std_canonical_va_list_type (type
);
24393 /* Iterate through the target-specific builtin types for va_list.
24394 IDX denotes the iterator, *PTREE is set to the result type of
24395 the va_list builtin, and *PNAME to its internal type.
24396 Returns zero if there is no element for this index, otherwise
24397 IDX should be increased upon the next call.
24398 Note, do not iterate a base builtin's name like __builtin_va_list.
24399 Used from c_common_nodes_and_builtins. */
24402 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
24412 *ptree
= ms_va_list_type_node
;
24413 *pname
= "__builtin_ms_va_list";
24417 *ptree
= sysv_va_list_type_node
;
24418 *pname
= "__builtin_sysv_va_list";
24426 #undef TARGET_SCHED_DISPATCH
24427 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
24428 #undef TARGET_SCHED_DISPATCH_DO
24429 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
24430 #undef TARGET_SCHED_REASSOCIATION_WIDTH
24431 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
24432 #undef TARGET_SCHED_REORDER
24433 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
24434 #undef TARGET_SCHED_ADJUST_PRIORITY
24435 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
24436 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
24437 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
24438 ix86_dependencies_evaluation_hook
24441 /* Implementation of reassociation_width target hook used by
24442 reassoc phase to identify parallelism level in reassociated
24443 tree. Statements tree_code is passed in OPC. Arguments type
24444 is passed in MODE. */
24447 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
24451 if (VECTOR_MODE_P (mode
))
24454 if (INTEGRAL_MODE_P (mode
))
24455 width
= ix86_cost
->reassoc_vec_int
;
24456 else if (FLOAT_MODE_P (mode
))
24457 width
= ix86_cost
->reassoc_vec_fp
;
24462 /* Integer vector instructions execute in FP unit
24463 and can execute 3 additions and one multiplication per cycle. */
24464 if ((ix86_tune
== PROCESSOR_ZNVER1
|| ix86_tune
== PROCESSOR_ZNVER2
24465 || ix86_tune
== PROCESSOR_ZNVER3
|| ix86_tune
== PROCESSOR_ZNVER4
)
24466 && INTEGRAL_MODE_P (mode
) && op
!= PLUS
&& op
!= MINUS
)
24469 /* Account for targets that splits wide vectors into multiple parts. */
24470 if (TARGET_AVX512_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 256)
24471 div
= GET_MODE_BITSIZE (mode
) / 256;
24472 else if (TARGET_AVX256_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 128)
24473 div
= GET_MODE_BITSIZE (mode
) / 128;
24474 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
24475 div
= GET_MODE_BITSIZE (mode
) / 64;
24476 width
= (width
+ div
- 1) / div
;
24479 else if (INTEGRAL_MODE_P (mode
))
24480 width
= ix86_cost
->reassoc_int
;
24481 else if (FLOAT_MODE_P (mode
))
24482 width
= ix86_cost
->reassoc_fp
;
24484 /* Avoid using too many registers in 32bit mode. */
24485 if (!TARGET_64BIT
&& width
> 2)
24490 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
24491 place emms and femms instructions. */
24493 static machine_mode
24494 ix86_preferred_simd_mode (scalar_mode mode
)
24502 if (TARGET_AVX512BW
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24504 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24510 if (TARGET_AVX512BW
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24512 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24518 if (TARGET_AVX512F
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24520 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24526 if (TARGET_AVX512F
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24528 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24534 if (TARGET_AVX512FP16
)
24536 if (TARGET_AVX512VL
)
24538 if (TARGET_PREFER_AVX128
)
24540 else if (TARGET_PREFER_AVX256
|| !TARGET_EVEX512
)
24543 if (TARGET_EVEX512
)
24549 if (TARGET_AVX512F
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24551 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24557 if (TARGET_AVX512F
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24559 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24561 else if (TARGET_SSE2
)
24570 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
24571 vectors. If AVX512F is enabled then try vectorizing with 512bit,
24572 256bit and 128bit vectors. */
24574 static unsigned int
24575 ix86_autovectorize_vector_modes (vector_modes
*modes
, bool all
)
24577 if (TARGET_AVX512F
&& TARGET_EVEX512
&& !TARGET_PREFER_AVX256
)
24579 modes
->safe_push (V64QImode
);
24580 modes
->safe_push (V32QImode
);
24581 modes
->safe_push (V16QImode
);
24583 else if (TARGET_AVX512F
&& TARGET_EVEX512
&& all
)
24585 modes
->safe_push (V32QImode
);
24586 modes
->safe_push (V16QImode
);
24587 modes
->safe_push (V64QImode
);
24589 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
24591 modes
->safe_push (V32QImode
);
24592 modes
->safe_push (V16QImode
);
24594 else if (TARGET_AVX
&& all
)
24596 modes
->safe_push (V16QImode
);
24597 modes
->safe_push (V32QImode
);
24599 else if (TARGET_SSE2
)
24600 modes
->safe_push (V16QImode
);
24602 if (TARGET_MMX_WITH_SSE
)
24603 modes
->safe_push (V8QImode
);
24606 modes
->safe_push (V4QImode
);
24611 /* Implemenation of targetm.vectorize.get_mask_mode. */
24613 static opt_machine_mode
24614 ix86_get_mask_mode (machine_mode data_mode
)
24616 unsigned vector_size
= GET_MODE_SIZE (data_mode
);
24617 unsigned nunits
= GET_MODE_NUNITS (data_mode
);
24618 unsigned elem_size
= vector_size
/ nunits
;
24620 /* Scalar mask case. */
24621 if ((TARGET_AVX512F
&& TARGET_EVEX512
&& vector_size
== 64)
24622 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16))
24623 /* AVX512FP16 only supports vector comparison
24624 to kmask for _Float16. */
24625 || (TARGET_AVX512VL
&& TARGET_AVX512FP16
24626 && GET_MODE_INNER (data_mode
) == E_HFmode
))
24630 || (TARGET_AVX512BW
&& (elem_size
== 1 || elem_size
== 2)))
24631 return smallest_int_mode_for_size (nunits
);
24634 scalar_int_mode elem_mode
24635 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
24637 gcc_assert (elem_size
* nunits
== vector_size
);
24639 return mode_for_vector (elem_mode
, nunits
);
24644 /* Return class of registers which could be used for pseudo of MODE
24645 and of class RCLASS for spilling instead of memory. Return NO_REGS
24646 if it is not possible or non-profitable. */
24648 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
24651 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
24653 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
24655 && TARGET_INTER_UNIT_MOVES_TO_VEC
24656 && TARGET_INTER_UNIT_MOVES_FROM_VEC
24657 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
24658 && INTEGER_CLASS_P (rclass
))
24659 return ALL_SSE_REGS
;
24663 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
24664 but returns a lower bound. */
24666 static unsigned int
24667 ix86_max_noce_ifcvt_seq_cost (edge e
)
24669 bool predictable_p
= predictable_edge_p (e
);
24672 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost
))
24673 return param_max_rtl_if_conversion_predictable_cost
;
24677 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost
))
24678 return param_max_rtl_if_conversion_unpredictable_cost
;
24681 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
24684 /* Return true if SEQ is a good candidate as a replacement for the
24685 if-convertible sequence described in IF_INFO. */
24688 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
24690 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
24693 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
24694 Maybe we should allow even more conditional moves as long as they
24695 are used far enough not to stall the CPU, or also consider
24696 IF_INFO->TEST_BB succ edge probabilities. */
24697 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
24699 rtx set
= single_set (insn
);
24702 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
24704 rtx src
= SET_SRC (set
);
24705 machine_mode mode
= GET_MODE (src
);
24706 if (GET_MODE_CLASS (mode
) != MODE_INT
24707 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
24709 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
24710 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
24712 /* insn is CMOV or FCMOV. */
24713 if (++cmov_cnt
> 1)
24717 return default_noce_conversion_profitable_p (seq
, if_info
);
24720 /* x86-specific vector costs. */
24721 class ix86_vector_costs
: public vector_costs
24724 ix86_vector_costs (vec_info
*, bool);
24726 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
24727 stmt_vec_info stmt_info
, slp_tree node
,
24728 tree vectype
, int misalign
,
24729 vect_cost_model_location where
) override
;
24730 void finish_cost (const vector_costs
*) override
;
24734 /* Estimate register pressure of the vectorized code. */
24735 void ix86_vect_estimate_reg_pressure ();
24736 /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
24737 estimation of register pressure.
24738 ??? Currently it's only used by vec_construct/scalar_to_vec
24739 where we know it's not loaded from memory. */
24740 unsigned m_num_gpr_needed
[3];
24741 unsigned m_num_sse_needed
[3];
24744 ix86_vector_costs::ix86_vector_costs (vec_info
* vinfo
, bool costing_for_scalar
)
24745 : vector_costs (vinfo
, costing_for_scalar
),
24746 m_num_gpr_needed (),
24747 m_num_sse_needed ()
24751 /* Implement targetm.vectorize.create_costs. */
24753 static vector_costs
*
24754 ix86_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
24756 return new ix86_vector_costs (vinfo
, costing_for_scalar
);
24760 ix86_vector_costs::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
24761 stmt_vec_info stmt_info
, slp_tree node
,
24762 tree vectype
, int misalign
,
24763 vect_cost_model_location where
)
24765 unsigned retval
= 0;
24767 = (kind
== scalar_stmt
|| kind
== scalar_load
|| kind
== scalar_store
);
24768 int stmt_cost
= - 1;
24771 machine_mode mode
= scalar_p
? SImode
: TImode
;
24773 if (vectype
!= NULL
)
24775 fp
= FLOAT_TYPE_P (vectype
);
24776 mode
= TYPE_MODE (vectype
);
24778 mode
= TYPE_MODE (TREE_TYPE (vectype
));
24781 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
24783 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
24785 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
24786 /*machine_mode inner_mode = mode;
24787 if (VECTOR_MODE_P (mode))
24788 inner_mode = GET_MODE_INNER (mode);*/
24793 case POINTER_PLUS_EXPR
:
24795 if (kind
== scalar_stmt
)
24797 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
24798 stmt_cost
= ix86_cost
->addss
;
24799 else if (X87_FLOAT_MODE_P (mode
))
24800 stmt_cost
= ix86_cost
->fadd
;
24802 stmt_cost
= ix86_cost
->add
;
24805 stmt_cost
= ix86_vec_cost (mode
, fp
? ix86_cost
->addss
24806 : ix86_cost
->sse_op
);
24810 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
24811 take it as MULT_EXPR. */
24812 case MULT_HIGHPART_EXPR
:
24813 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
24815 /* There's no direct instruction for WIDEN_MULT_EXPR,
24816 take emulation into account. */
24817 case WIDEN_MULT_EXPR
:
24818 stmt_cost
= ix86_widen_mult_cost (ix86_cost
, mode
,
24819 TYPE_UNSIGNED (vectype
));
24823 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
24824 stmt_cost
= ix86_cost
->sse_op
;
24825 else if (X87_FLOAT_MODE_P (mode
))
24826 stmt_cost
= ix86_cost
->fchs
;
24827 else if (VECTOR_MODE_P (mode
))
24828 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
24830 stmt_cost
= ix86_cost
->add
;
24832 case TRUNC_DIV_EXPR
:
24833 case CEIL_DIV_EXPR
:
24834 case FLOOR_DIV_EXPR
:
24835 case ROUND_DIV_EXPR
:
24836 case TRUNC_MOD_EXPR
:
24837 case CEIL_MOD_EXPR
:
24838 case FLOOR_MOD_EXPR
:
24840 case ROUND_MOD_EXPR
:
24841 case EXACT_DIV_EXPR
:
24842 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
24850 tree op1
= gimple_assign_rhs1 (stmt_info
->stmt
);
24851 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
24852 stmt_cost
= ix86_shift_rotate_cost
24854 (subcode
== RSHIFT_EXPR
24855 && !TYPE_UNSIGNED (TREE_TYPE (op1
)))
24856 ? ASHIFTRT
: LSHIFTRT
, mode
,
24857 TREE_CODE (op2
) == INTEGER_CST
,
24858 cst_and_fits_in_hwi (op2
)
24859 ? int_cst_value (op2
) : -1,
24860 false, false, NULL
, NULL
);
24864 /* Only sign-conversions are free. */
24865 if (tree_nop_conversion_p
24866 (TREE_TYPE (gimple_assign_lhs (stmt_info
->stmt
)),
24867 TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
))))
24879 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode
))
24880 stmt_cost
= ix86_cost
->sse_op
;
24881 else if (VECTOR_MODE_P (mode
))
24882 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
24884 stmt_cost
= ix86_cost
->add
;
24892 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
24895 && (cfn
= gimple_call_combined_fn (stmt_info
->stmt
)) != CFN_LAST
)
24899 stmt_cost
= ix86_vec_cost (mode
,
24900 mode
== SFmode
? ix86_cost
->fmass
24901 : ix86_cost
->fmasd
);
24904 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
24910 /* If we do elementwise loads into a vector then we are bound by
24911 latency and execution resources for the many scalar loads
24912 (AGU and load ports). Try to account for this by scaling the
24913 construction cost by the number of elements involved. */
24914 if ((kind
== vec_construct
|| kind
== vec_to_scalar
)
24916 && (STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
24917 || STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
24918 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
24919 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info
)))
24921 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_GATHER_SCATTER
))
24923 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
24924 stmt_cost
*= (TYPE_VECTOR_SUBPARTS (vectype
) + 1);
24926 else if ((kind
== vec_construct
|| kind
== scalar_to_vec
)
24928 && SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
24930 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
24933 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
24934 if (TREE_CODE (op
) == SSA_NAME
)
24935 TREE_VISITED (op
) = 0;
24936 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
24938 if (TREE_CODE (op
) != SSA_NAME
24939 || TREE_VISITED (op
))
24941 TREE_VISITED (op
) = 1;
24942 gimple
*def
= SSA_NAME_DEF_STMT (op
);
24944 if (is_gimple_assign (def
)
24945 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def
))
24946 && ((tem
= gimple_assign_rhs1 (def
)), true)
24947 && TREE_CODE (tem
) == SSA_NAME
24948 /* A sign-change expands to nothing. */
24949 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def
)),
24951 def
= SSA_NAME_DEF_STMT (tem
);
24952 /* When the component is loaded from memory we can directly
24953 move it to a vector register, otherwise we have to go
24954 via a GPR or via vpinsr which involves similar cost.
24955 Likewise with a BIT_FIELD_REF extracting from a vector
24956 register we can hope to avoid using a GPR. */
24957 if (!is_gimple_assign (def
)
24958 || ((!gimple_assign_load_p (def
)
24960 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op
))) == 1))
24961 && (gimple_assign_rhs_code (def
) != BIT_FIELD_REF
24962 || !VECTOR_TYPE_P (TREE_TYPE
24963 (TREE_OPERAND (gimple_assign_rhs1 (def
), 0))))))
24966 m_num_sse_needed
[where
]++;
24969 m_num_gpr_needed
[where
]++;
24970 stmt_cost
+= ix86_cost
->sse_to_integer
;
24974 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
24975 if (TREE_CODE (op
) == SSA_NAME
)
24976 TREE_VISITED (op
) = 0;
24978 if (stmt_cost
== -1)
24979 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
24981 /* Penalize DFmode vector operations for Bonnell. */
24982 if (TARGET_CPU_P (BONNELL
) && kind
== vector_stmt
24983 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
24984 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
24986 /* Statements in an inner loop relative to the loop being
24987 vectorized are weighted more heavily. The value here is
24988 arbitrary and could potentially be improved with analysis. */
24989 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
24991 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
24992 for Silvermont as it has out of order integer pipeline and can execute
24993 2 scalar instruction per tick, but has in order SIMD pipeline. */
24994 if ((TARGET_CPU_P (SILVERMONT
) || TARGET_CPU_P (GOLDMONT
)
24995 || TARGET_CPU_P (GOLDMONT_PLUS
) || TARGET_CPU_P (INTEL
))
24996 && stmt_info
&& stmt_info
->stmt
)
24998 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
24999 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
25000 retval
= (retval
* 17) / 10;
25003 m_costs
[where
] += retval
;
25009 ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
25011 unsigned gpr_spill_cost
= COSTS_N_INSNS (ix86_cost
->int_store
[2]) / 2;
25012 unsigned sse_spill_cost
= COSTS_N_INSNS (ix86_cost
->sse_store
[0]) / 2;
25014 /* Any better way to have target available fp registers, currently use SSE_REGS. */
25015 unsigned target_avail_sse
= TARGET_64BIT
? (TARGET_AVX512F
? 32 : 16) : 8;
25016 for (unsigned i
= 0; i
!= 3; i
++)
25018 if (m_num_gpr_needed
[i
] > target_avail_regs
)
25019 m_costs
[i
] += gpr_spill_cost
* (m_num_gpr_needed
[i
] - target_avail_regs
);
25020 /* Only measure sse registers pressure. */
25021 if (TARGET_SSE
&& (m_num_sse_needed
[i
] > target_avail_sse
))
25022 m_costs
[i
] += sse_spill_cost
* (m_num_sse_needed
[i
] - target_avail_sse
);
25027 ix86_vector_costs::finish_cost (const vector_costs
*scalar_costs
)
25029 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
);
25030 if (loop_vinfo
&& !m_costing_for_scalar
)
25032 /* We are currently not asking the vectorizer to compare costs
25033 between different vector mode sizes. When using predication
25034 that will end up always choosing the prefered mode size even
25035 if there's a smaller mode covering all lanes. Test for this
25036 situation and artificially reject the larger mode attempt.
25037 ??? We currently lack masked ops for sub-SSE sized modes,
25038 so we could restrict this rejection to AVX and AVX512 modes
25039 but error on the safe side for now. */
25040 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
)
25041 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo
)
25042 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
)
25043 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
).to_constant ())
25044 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo
))))
25045 m_costs
[vect_body
] = INT_MAX
;
25048 ix86_vect_estimate_reg_pressure ();
25050 vector_costs::finish_cost (scalar_costs
);
25053 /* Validate target specific memory model bits in VAL. */
25055 static unsigned HOST_WIDE_INT
25056 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
25058 enum memmodel model
= memmodel_from_int (val
);
25061 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
25063 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
25065 warning (OPT_Winvalid_memory_model
,
25066 "unknown architecture specific memory model");
25067 return MEMMODEL_SEQ_CST
;
25069 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
25070 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
25072 warning (OPT_Winvalid_memory_model
,
25073 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
25075 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
25077 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
25079 warning (OPT_Winvalid_memory_model
,
25080 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
25082 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
25087 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
25088 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
25089 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
25090 or number of vecsize_mangle variants that should be emitted. */
25093 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
25094 struct cgraph_simd_clone
*clonei
,
25095 tree base_type
, int num
,
25100 if (clonei
->simdlen
25101 && (clonei
->simdlen
< 2
25102 || clonei
->simdlen
> 1024
25103 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
25106 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
25107 "unsupported simdlen %wd", clonei
->simdlen
.to_constant ());
25111 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
25112 if (TREE_CODE (ret_type
) != VOID_TYPE
)
25113 switch (TYPE_MODE (ret_type
))
25121 /* case E_SCmode: */
25122 /* case E_DCmode: */
25123 if (!AGGREGATE_TYPE_P (ret_type
))
25128 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
25129 "unsupported return type %qT for simd", ret_type
);
25135 tree type_arg_types
= TYPE_ARG_TYPES (TREE_TYPE (node
->decl
));
25136 bool decl_arg_p
= (node
->definition
|| type_arg_types
== NULL_TREE
);
25138 for (t
= (decl_arg_p
? DECL_ARGUMENTS (node
->decl
) : type_arg_types
), i
= 0;
25139 t
&& t
!= void_list_node
; t
= TREE_CHAIN (t
), i
++)
25141 tree arg_type
= decl_arg_p
? TREE_TYPE (t
) : TREE_VALUE (t
);
25142 switch (TYPE_MODE (arg_type
))
25150 /* case E_SCmode: */
25151 /* case E_DCmode: */
25152 if (!AGGREGATE_TYPE_P (arg_type
))
25156 if (clonei
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
)
25159 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
25160 "unsupported argument type %qT for simd", arg_type
);
25165 if (!TREE_PUBLIC (node
->decl
) || !explicit_p
)
25167 /* If the function isn't exported, we can pick up just one ISA
25169 if (TARGET_AVX512F
&& TARGET_EVEX512
)
25170 clonei
->vecsize_mangle
= 'e';
25171 else if (TARGET_AVX2
)
25172 clonei
->vecsize_mangle
= 'd';
25173 else if (TARGET_AVX
)
25174 clonei
->vecsize_mangle
= 'c';
25176 clonei
->vecsize_mangle
= 'b';
25181 clonei
->vecsize_mangle
= "bcde"[num
];
25184 clonei
->mask_mode
= VOIDmode
;
25185 switch (clonei
->vecsize_mangle
)
25188 clonei
->vecsize_int
= 128;
25189 clonei
->vecsize_float
= 128;
25192 clonei
->vecsize_int
= 128;
25193 clonei
->vecsize_float
= 256;
25196 clonei
->vecsize_int
= 256;
25197 clonei
->vecsize_float
= 256;
25200 clonei
->vecsize_int
= 512;
25201 clonei
->vecsize_float
= 512;
25202 if (TYPE_MODE (base_type
) == QImode
)
25203 clonei
->mask_mode
= DImode
;
25205 clonei
->mask_mode
= SImode
;
25208 if (clonei
->simdlen
== 0)
25210 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
25211 clonei
->simdlen
= clonei
->vecsize_int
;
25213 clonei
->simdlen
= clonei
->vecsize_float
;
25214 clonei
->simdlen
= clonei
->simdlen
25215 / GET_MODE_BITSIZE (TYPE_MODE (base_type
));
25217 else if (clonei
->simdlen
> 16)
25219 /* For compatibility with ICC, use the same upper bounds
25220 for simdlen. In particular, for CTYPE below, use the return type,
25221 unless the function returns void, in that case use the characteristic
25222 type. If it is possible for given SIMDLEN to pass CTYPE value
25223 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
25224 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
25225 emit corresponding clone. */
25226 tree ctype
= ret_type
;
25227 if (VOID_TYPE_P (ret_type
))
25229 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
25230 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
25231 cnt
/= clonei
->vecsize_int
;
25233 cnt
/= clonei
->vecsize_float
;
25234 if (cnt
> (TARGET_64BIT
? 16 : 8))
25237 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
25238 "unsupported simdlen %wd",
25239 clonei
->simdlen
.to_constant ());
25246 /* If SIMD clone NODE can't be used in a vectorized loop
25247 in current function, return -1, otherwise return a badness of using it
25248 (0 if it is most desirable from vecsize_mangle point of view, 1
25249 slightly less desirable, etc.). */
25252 ix86_simd_clone_usable (struct cgraph_node
*node
)
25254 switch (node
->simdclone
->vecsize_mangle
)
25261 return (TARGET_AVX512F
&& TARGET_EVEX512
) ? 3 : TARGET_AVX2
? 2 : 1;
25265 return (TARGET_AVX512F
&& TARGET_EVEX512
) ? 2 : TARGET_AVX2
? 1 : 0;
25269 return (TARGET_AVX512F
&& TARGET_EVEX512
) ? 1 : 0;
25271 if (!TARGET_AVX512F
|| !TARGET_EVEX512
)
25275 gcc_unreachable ();
25279 /* This function adjusts the unroll factor based on
25280 the hardware capabilities. For ex, bdver3 has
25281 a loop buffer which makes unrolling of smaller
25282 loops less important. This function decides the
25283 unroll factor using number of memory references
25284 (value 32 is used) as a heuristic. */
25287 ix86_loop_unroll_adjust (unsigned nunroll
, class loop
*loop
)
25292 unsigned mem_count
= 0;
25294 /* Unroll small size loop when unroll factor is not explicitly
25296 if (ix86_unroll_only_small_loops
&& !loop
->unroll
)
25298 if (loop
->ninsns
<= ix86_cost
->small_unroll_ninsns
)
25299 return MIN (nunroll
, ix86_cost
->small_unroll_factor
);
25304 if (!TARGET_ADJUST_UNROLL
)
25307 /* Count the number of memory references within the loop body.
25308 This value determines the unrolling factor for bdver3 and bdver4
25310 subrtx_iterator::array_type array
;
25311 bbs
= get_loop_body (loop
);
25312 for (i
= 0; i
< loop
->num_nodes
; i
++)
25313 FOR_BB_INSNS (bbs
[i
], insn
)
25314 if (NONDEBUG_INSN_P (insn
))
25315 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
25316 if (const_rtx x
= *iter
)
25319 machine_mode mode
= GET_MODE (x
);
25320 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
25328 if (mem_count
&& mem_count
<=32)
25329 return MIN (nunroll
, 32 / mem_count
);
25335 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
25338 ix86_float_exceptions_rounding_supported_p (void)
25340 /* For x87 floating point with standard excess precision handling,
25341 there is no adddf3 pattern (since x87 floating point only has
25342 XFmode operations) so the default hook implementation gets this
25344 return TARGET_80387
|| (TARGET_SSE
&& TARGET_SSE_MATH
);
25347 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
25350 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
25352 if (!TARGET_80387
&& !(TARGET_SSE
&& TARGET_SSE_MATH
))
25354 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
25357 tree fenv_index_type
= build_index_type (size_int (6));
25358 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
25359 tree fenv_var
= create_tmp_var_raw (fenv_type
);
25360 TREE_ADDRESSABLE (fenv_var
) = 1;
25361 tree fenv_ptr
= build_pointer_type (fenv_type
);
25362 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
25363 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
25364 tree fnstenv
= get_ix86_builtin (IX86_BUILTIN_FNSTENV
);
25365 tree fldenv
= get_ix86_builtin (IX86_BUILTIN_FLDENV
);
25366 tree fnstsw
= get_ix86_builtin (IX86_BUILTIN_FNSTSW
);
25367 tree fnclex
= get_ix86_builtin (IX86_BUILTIN_FNCLEX
);
25368 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
25369 tree hold_fnclex
= build_call_expr (fnclex
, 0);
25370 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
25371 NULL_TREE
, NULL_TREE
);
25372 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
25374 *clear
= build_call_expr (fnclex
, 0);
25375 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
25376 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
25377 tree sw_mod
= build4 (TARGET_EXPR
, short_unsigned_type_node
, sw_var
,
25378 fnstsw_call
, NULL_TREE
, NULL_TREE
);
25379 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
25380 tree update_mod
= build4 (TARGET_EXPR
, integer_type_node
,
25381 exceptions_var
, exceptions_x87
,
25382 NULL_TREE
, NULL_TREE
);
25383 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
25384 sw_mod
, update_mod
);
25385 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
25386 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
25388 if (TARGET_SSE
&& TARGET_SSE_MATH
)
25390 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
25391 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
25392 tree stmxcsr
= get_ix86_builtin (IX86_BUILTIN_STMXCSR
);
25393 tree ldmxcsr
= get_ix86_builtin (IX86_BUILTIN_LDMXCSR
);
25394 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
25395 tree hold_assign_orig
= build4 (TARGET_EXPR
, unsigned_type_node
,
25396 mxcsr_orig_var
, stmxcsr_hold_call
,
25397 NULL_TREE
, NULL_TREE
);
25398 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
25400 build_int_cst (unsigned_type_node
, 0x1f80));
25401 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
25402 build_int_cst (unsigned_type_node
, 0xffffffc0));
25403 tree hold_assign_mod
= build4 (TARGET_EXPR
, unsigned_type_node
,
25404 mxcsr_mod_var
, hold_mod_val
,
25405 NULL_TREE
, NULL_TREE
);
25406 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
25407 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
25408 hold_assign_orig
, hold_assign_mod
);
25409 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
25410 ldmxcsr_hold_call
);
25412 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
25415 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
25417 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
25418 ldmxcsr_clear_call
);
25420 *clear
= ldmxcsr_clear_call
;
25421 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
25422 tree exceptions_sse
= fold_convert (integer_type_node
,
25423 stxmcsr_update_call
);
25426 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
25427 exceptions_var
, exceptions_sse
);
25428 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
25429 exceptions_var
, exceptions_mod
);
25430 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
25431 exceptions_assign
);
25434 *update
= build4 (TARGET_EXPR
, integer_type_node
, exceptions_var
,
25435 exceptions_sse
, NULL_TREE
, NULL_TREE
);
25436 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
25437 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
25438 ldmxcsr_update_call
);
25440 tree atomic_feraiseexcept
25441 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
25442 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
25443 1, exceptions_var
);
25444 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
25445 atomic_feraiseexcept_call
);
25448 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25449 /* For i386, common symbol is local only for non-PIE binaries. For
25450 x86-64, common symbol is local only for non-PIE binaries or linker
25451 supports copy reloc in PIE binaries. */
25454 ix86_binds_local_p (const_tree exp
)
25456 bool direct_extern_access
25457 = (ix86_direct_extern_access
25458 && !(VAR_OR_FUNCTION_DECL_P (exp
)
25459 && lookup_attribute ("nodirect_extern_access",
25460 DECL_ATTRIBUTES (exp
))));
25461 if (!direct_extern_access
)
25462 ix86_has_no_direct_extern_access
= true;
25463 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true,
25464 direct_extern_access
,
25465 (direct_extern_access
25468 && HAVE_LD_PIE_COPYRELOC
!= 0))));
25471 /* If flag_pic or ix86_direct_extern_access is false, then neither
25472 local nor global relocs should be placed in readonly memory. */
25475 ix86_reloc_rw_mask (void)
25477 return (flag_pic
|| !ix86_direct_extern_access
) ? 3 : 0;
25481 /* Return true iff ADDR can be used as a symbolic base address. */
25484 symbolic_base_address_p (rtx addr
)
25486 if (GET_CODE (addr
) == SYMBOL_REF
)
25489 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_GOTOFF
)
25495 /* Return true iff ADDR can be used as a base address. */
25498 base_address_p (rtx addr
)
25503 if (symbolic_base_address_p (addr
))
25509 /* If MEM is in the form of [(base+symbase)+offset], extract the three
25510 parts of address and set to BASE, SYMBASE and OFFSET, otherwise
25514 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*symbase
, rtx
*offset
)
25518 gcc_assert (MEM_P (mem
));
25520 addr
= XEXP (mem
, 0);
25522 if (GET_CODE (addr
) == CONST
)
25523 addr
= XEXP (addr
, 0);
25525 if (base_address_p (addr
))
25528 *symbase
= const0_rtx
;
25529 *offset
= const0_rtx
;
25533 if (GET_CODE (addr
) == PLUS
25534 && base_address_p (XEXP (addr
, 0)))
25536 rtx addend
= XEXP (addr
, 1);
25538 if (GET_CODE (addend
) == CONST
)
25539 addend
= XEXP (addend
, 0);
25541 if (CONST_INT_P (addend
))
25543 *base
= XEXP (addr
, 0);
25544 *symbase
= const0_rtx
;
25549 /* Also accept REG + symbolic ref, with or without a CONST_INT
25551 if (REG_P (XEXP (addr
, 0)))
25553 if (symbolic_base_address_p (addend
))
25555 *base
= XEXP (addr
, 0);
25557 *offset
= const0_rtx
;
25561 if (GET_CODE (addend
) == PLUS
25562 && symbolic_base_address_p (XEXP (addend
, 0))
25563 && CONST_INT_P (XEXP (addend
, 1)))
25565 *base
= XEXP (addr
, 0);
25566 *symbase
= XEXP (addend
, 0);
25567 *offset
= XEXP (addend
, 1);
25576 /* Given OPERANDS of consecutive load/store, check if we can merge
25577 them into move multiple. LOAD is true if they are load instructions.
25578 MODE is the mode of memory operands. */
25581 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
25584 HOST_WIDE_INT offval_1
, offval_2
, msize
;
25585 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
,
25586 symbase_1
, symbase_2
, offset_1
, offset_2
;
25590 mem_1
= operands
[1];
25591 mem_2
= operands
[3];
25592 reg_1
= operands
[0];
25593 reg_2
= operands
[2];
25597 mem_1
= operands
[0];
25598 mem_2
= operands
[2];
25599 reg_1
= operands
[1];
25600 reg_2
= operands
[3];
25603 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
25605 if (REGNO (reg_1
) != REGNO (reg_2
))
25608 /* Check if the addresses are in the form of [base+offset]. */
25609 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &symbase_1
, &offset_1
))
25611 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &symbase_2
, &offset_2
))
25614 /* Check if the bases are the same. */
25615 if (!rtx_equal_p (base_1
, base_2
) || !rtx_equal_p (symbase_1
, symbase_2
))
25618 offval_1
= INTVAL (offset_1
);
25619 offval_2
= INTVAL (offset_2
);
25620 msize
= GET_MODE_SIZE (mode
);
25621 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
25622 if (offval_1
+ msize
!= offval_2
)
25628 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
25631 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
25632 optimization_type opt_type
)
25647 return opt_type
== OPTIMIZE_FOR_SPEED
;
25650 if (SSE_FLOAT_MODE_P (mode1
)
25652 && !flag_trapping_math
25654 && mode1
!= HFmode
)
25655 return opt_type
== OPTIMIZE_FOR_SPEED
;
25661 if (((SSE_FLOAT_MODE_P (mode1
)
25664 || mode1
== HFmode
)
25665 && !flag_trapping_math
)
25667 return opt_type
== OPTIMIZE_FOR_SPEED
;
25670 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p (mode1
);
25677 /* Address space support.
25679 This is not "far pointers" in the 16-bit sense, but an easy way
25680 to use %fs and %gs segment prefixes. Therefore:
25682 (a) All address spaces have the same modes,
25683 (b) All address spaces have the same addresss forms,
25684 (c) While %fs and %gs are technically subsets of the generic
25685 address space, they are probably not subsets of each other.
25686 (d) Since we have no access to the segment base register values
25687 without resorting to a system call, we cannot convert a
25688 non-default address space to a default address space.
25689 Therefore we do not claim %fs or %gs are subsets of generic.
25691 Therefore we can (mostly) use the default hooks. */
25693 /* All use of segmentation is assumed to make address 0 valid. */
25696 ix86_addr_space_zero_address_valid (addr_space_t as
)
25698 return as
!= ADDR_SPACE_GENERIC
;
25702 ix86_init_libfuncs (void)
25706 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
25707 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
25711 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
25712 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
25716 darwin_rename_builtins ();
25720 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
25721 FPU, assume that the fpcw is set to extended precision; when using
25722 only SSE, rounding is correct; when using both SSE and the FPU,
25723 the rounding precision is indeterminate, since either may be chosen
25724 apparently at random. */
25726 static enum flt_eval_method
25727 ix86_get_excess_precision (enum excess_precision_type type
)
25731 case EXCESS_PRECISION_TYPE_FAST
:
25732 /* The fastest type to promote to will always be the native type,
25733 whether that occurs with implicit excess precision or
25735 return TARGET_AVX512FP16
25736 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25737 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
25738 case EXCESS_PRECISION_TYPE_STANDARD
:
25739 case EXCESS_PRECISION_TYPE_IMPLICIT
:
25740 /* Otherwise, the excess precision we want when we are
25741 in a standards compliant mode, and the implicit precision we
25742 provide would be identical were it not for the unpredictable
25744 if (TARGET_AVX512FP16
&& TARGET_SSE_MATH
)
25745 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
25746 else if (!TARGET_80387
)
25747 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
25748 else if (!TARGET_MIX_SSE_I387
)
25750 if (!(TARGET_SSE
&& TARGET_SSE_MATH
))
25751 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
25752 else if (TARGET_SSE2
)
25753 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
25756 /* If we are in standards compliant mode, but we know we will
25757 calculate in unpredictable precision, return
25758 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
25759 excess precision if the target can't guarantee it will honor
25761 return (type
== EXCESS_PRECISION_TYPE_STANDARD
25762 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
25763 : FLT_EVAL_METHOD_UNPREDICTABLE
);
25764 case EXCESS_PRECISION_TYPE_FLOAT16
:
25766 && !(TARGET_SSE_MATH
&& TARGET_SSE
))
25767 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
25768 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
25770 gcc_unreachable ();
25773 return FLT_EVAL_METHOD_UNPREDICTABLE
;
25776 /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
25778 ix86_bitint_type_info (int n
, struct bitint_info
*info
)
25783 info
->limb_mode
= QImode
;
25785 info
->limb_mode
= HImode
;
25787 info
->limb_mode
= SImode
;
25789 info
->limb_mode
= DImode
;
25790 info
->abi_limb_mode
= info
->limb_mode
;
25791 info
->big_endian
= false;
25792 info
->extended
= false;
25796 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
25797 decrements by exactly 2 no matter what the position was, there is no pushb.
25799 But as CIE data alignment factor on this arch is -4 for 32bit targets
25800 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
25801 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
25804 ix86_push_rounding (poly_int64 bytes
)
25806 return ROUND_UP (bytes
, UNITS_PER_WORD
);
25809 /* Use 8 bits metadata start from bit48 for LAM_U48,
25810 6 bits metadat start from bit57 for LAM_U57. */
25811 #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
25813 : (ix86_lam_type == lam_u57 ? 57 : 0))
25814 #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
25816 : (ix86_lam_type == lam_u57 ? 6 : 0))
25818 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
25820 ix86_memtag_can_tag_addresses ()
25822 return ix86_lam_type
!= lam_none
&& TARGET_LP64
;
25825 /* Implement TARGET_MEMTAG_TAG_SIZE. */
25827 ix86_memtag_tag_size ()
25829 return IX86_HWASAN_TAG_SIZE
;
25832 /* Implement TARGET_MEMTAG_SET_TAG. */
25834 ix86_memtag_set_tag (rtx untagged
, rtx tag
, rtx target
)
25836 /* default_memtag_insert_random_tag may
25837 generate tag with value more than 6 bits. */
25838 if (ix86_lam_type
== lam_u57
)
25840 unsigned HOST_WIDE_INT and_imm
25841 = (HOST_WIDE_INT_1U
<< IX86_HWASAN_TAG_SIZE
) - 1;
25843 emit_insn (gen_andqi3 (tag
, tag
, GEN_INT (and_imm
)));
25845 tag
= expand_simple_binop (Pmode
, ASHIFT
, tag
,
25846 GEN_INT (IX86_HWASAN_SHIFT
), NULL_RTX
,
25847 /* unsignedp = */1, OPTAB_WIDEN
);
25848 rtx ret
= expand_simple_binop (Pmode
, IOR
, untagged
, tag
, target
,
25849 /* unsignedp = */1, OPTAB_DIRECT
);
25853 /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
25855 ix86_memtag_extract_tag (rtx tagged_pointer
, rtx target
)
25857 rtx tag
= expand_simple_binop (Pmode
, LSHIFTRT
, tagged_pointer
,
25858 GEN_INT (IX86_HWASAN_SHIFT
), target
,
25859 /* unsignedp = */0,
25861 rtx ret
= gen_reg_rtx (QImode
);
25862 /* Mask off bit63 when LAM_U57. */
25863 if (ix86_lam_type
== lam_u57
)
25865 unsigned HOST_WIDE_INT and_imm
25866 = (HOST_WIDE_INT_1U
<< IX86_HWASAN_TAG_SIZE
) - 1;
25867 emit_insn (gen_andqi3 (ret
, gen_lowpart (QImode
, tag
),
25868 gen_int_mode (and_imm
, QImode
)));
25871 emit_move_insn (ret
, gen_lowpart (QImode
, tag
));
25875 /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
25877 ix86_memtag_untagged_pointer (rtx tagged_pointer
, rtx target
)
25879 /* Leave bit63 alone. */
25880 rtx tag_mask
= gen_int_mode (((HOST_WIDE_INT_1U
<< IX86_HWASAN_SHIFT
)
25881 + (HOST_WIDE_INT_1U
<< 63) - 1),
25883 rtx untagged_base
= expand_simple_binop (Pmode
, AND
, tagged_pointer
,
25884 tag_mask
, target
, true,
25886 gcc_assert (untagged_base
);
25887 return untagged_base
;
25890 /* Implement TARGET_MEMTAG_ADD_TAG. */
25892 ix86_memtag_add_tag (rtx base
, poly_int64 offset
, unsigned char tag_offset
)
25894 rtx base_tag
= gen_reg_rtx (QImode
);
25895 rtx base_addr
= gen_reg_rtx (Pmode
);
25896 rtx tagged_addr
= gen_reg_rtx (Pmode
);
25897 rtx new_tag
= gen_reg_rtx (QImode
);
25898 unsigned HOST_WIDE_INT and_imm
25899 = (HOST_WIDE_INT_1U
<< IX86_HWASAN_SHIFT
) - 1;
25901 /* When there's "overflow" in tag adding,
25902 need to mask the most significant bit off. */
25903 emit_move_insn (base_tag
, ix86_memtag_extract_tag (base
, NULL_RTX
));
25904 emit_move_insn (base_addr
,
25905 ix86_memtag_untagged_pointer (base
, NULL_RTX
));
25906 emit_insn (gen_add2_insn (base_tag
, gen_int_mode (tag_offset
, QImode
)));
25907 emit_move_insn (new_tag
, base_tag
);
25908 emit_insn (gen_andqi3 (new_tag
, new_tag
, gen_int_mode (and_imm
, QImode
)));
25909 emit_move_insn (tagged_addr
,
25910 ix86_memtag_set_tag (base_addr
, new_tag
, NULL_RTX
));
25911 return plus_constant (Pmode
, tagged_addr
, offset
);
25914 /* Target-specific selftests. */
25918 namespace selftest
{
25920 /* Verify that hard regs are dumped as expected (in compact mode). */
25923 ix86_test_dumping_hard_regs ()
25925 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
25926 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
25929 /* Test dumping an insn with repeated references to the same SCRATCH,
25930 to verify the rtx_reuse code. */
25933 ix86_test_dumping_memory_blockage ()
25935 set_new_first_and_last_insn (NULL
, NULL
);
25937 rtx pat
= gen_memory_blockage ();
25938 rtx_reuse_manager r
;
25939 r
.preprocess (pat
);
25941 /* Verify that the repeated references to the SCRATCH show use
25942 reuse IDS. The first should be prefixed with a reuse ID,
25943 and the second should be dumped as a "reuse_rtx" of that ID.
25944 The expected string assumes Pmode == DImode. */
25945 if (Pmode
== DImode
)
25946 ASSERT_RTL_DUMP_EQ_WITH_REUSE
25947 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
25949 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
25950 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
25953 /* Verify loading an RTL dump; specifically a dump of copying
25954 a param on x86_64 from a hard reg into the frame.
25955 This test is target-specific since the dump contains target-specific
25959 ix86_test_loading_dump_fragment_1 ()
25961 rtl_dump_test
t (SELFTEST_LOCATION
,
25962 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
25964 rtx_insn
*insn
= get_insn_by_uid (1);
25966 /* The block structure and indentation here is purely for
25967 readability; it mirrors the structure of the rtx. */
25970 rtx pat
= PATTERN (insn
);
25971 ASSERT_EQ (SET
, GET_CODE (pat
));
25973 rtx dest
= SET_DEST (pat
);
25974 ASSERT_EQ (MEM
, GET_CODE (dest
));
25975 /* Verify the "/c" was parsed. */
25976 ASSERT_TRUE (RTX_FLAG (dest
, call
));
25977 ASSERT_EQ (SImode
, GET_MODE (dest
));
25979 rtx addr
= XEXP (dest
, 0);
25980 ASSERT_EQ (PLUS
, GET_CODE (addr
));
25981 ASSERT_EQ (DImode
, GET_MODE (addr
));
25983 rtx lhs
= XEXP (addr
, 0);
25984 /* Verify that the "frame" REG was consolidated. */
25985 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
25988 rtx rhs
= XEXP (addr
, 1);
25989 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
25990 ASSERT_EQ (-4, INTVAL (rhs
));
25993 /* Verify the "[1 i+0 S4 A32]" was parsed. */
25994 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
25995 /* "i" should have been handled by synthesizing a global int
25996 variable named "i". */
25997 mem_expr
= MEM_EXPR (dest
);
25998 ASSERT_NE (mem_expr
, NULL
);
25999 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
26000 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
26001 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
26002 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
26004 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
26005 ASSERT_EQ (0, MEM_OFFSET (dest
));
26007 ASSERT_EQ (4, MEM_SIZE (dest
));
26009 ASSERT_EQ (32, MEM_ALIGN (dest
));
26012 rtx src
= SET_SRC (pat
);
26013 ASSERT_EQ (REG
, GET_CODE (src
));
26014 ASSERT_EQ (SImode
, GET_MODE (src
));
26015 ASSERT_EQ (5, REGNO (src
));
26016 tree reg_expr
= REG_EXPR (src
);
26017 /* "i" here should point to the same var as for the MEM_EXPR. */
26018 ASSERT_EQ (reg_expr
, mem_expr
);
26023 /* Verify that the RTL loader copes with a call_insn dump.
26024 This test is target-specific since the dump contains a target-specific
26028 ix86_test_loading_call_insn ()
26030 /* The test dump includes register "xmm0", where requires TARGET_SSE
26035 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
26037 rtx_insn
*insn
= get_insns ();
26038 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
26041 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
26043 rtx pat
= PATTERN (insn
);
26044 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
26046 /* Verify REG_NOTES. */
26048 /* "(expr_list:REG_CALL_DECL". */
26049 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
26050 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
26051 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
26053 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
26054 rtx_expr_list
*note1
= note0
->next ();
26055 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
26057 ASSERT_EQ (NULL
, note1
->next ());
26060 /* Verify CALL_INSN_FUNCTION_USAGE. */
26062 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
26063 rtx_expr_list
*usage
26064 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
26065 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
26066 ASSERT_EQ (DFmode
, GET_MODE (usage
));
26067 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
26068 ASSERT_EQ (NULL
, usage
->next ());
26072 /* Verify that the RTL loader copes a dump from print_rtx_function.
26073 This test is target-specific since the dump contains target-specific
26077 ix86_test_loading_full_dump ()
26079 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
26081 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
26083 rtx_insn
*insn_1
= get_insn_by_uid (1);
26084 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
26086 rtx_insn
*insn_7
= get_insn_by_uid (7);
26087 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
26088 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
26090 rtx_insn
*insn_15
= get_insn_by_uid (15);
26091 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
26092 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
26094 /* Verify crtl->return_rtx. */
26095 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
26096 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
26097 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
26100 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
26101 In particular, verify that it correctly loads the 2nd operand.
26102 This test is target-specific since these are machine-specific
26103 operands (and enums). */
26106 ix86_test_loading_unspec ()
26108 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
26110 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
26112 ASSERT_TRUE (cfun
);
26114 /* Test of an UNSPEC. */
26115 rtx_insn
*insn
= get_insns ();
26116 ASSERT_EQ (INSN
, GET_CODE (insn
));
26117 rtx set
= single_set (insn
);
26118 ASSERT_NE (NULL
, set
);
26119 rtx dst
= SET_DEST (set
);
26120 ASSERT_EQ (MEM
, GET_CODE (dst
));
26121 rtx src
= SET_SRC (set
);
26122 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
26123 ASSERT_EQ (BLKmode
, GET_MODE (src
));
26124 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
26126 rtx v0
= XVECEXP (src
, 0, 0);
26128 /* Verify that the two uses of the first SCRATCH have pointer
26130 rtx scratch_a
= XEXP (dst
, 0);
26131 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
26133 rtx scratch_b
= XEXP (v0
, 0);
26134 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
26136 ASSERT_EQ (scratch_a
, scratch_b
);
26138 /* Verify that the two mems are thus treated as equal. */
26139 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
26141 /* Verify that the insn is recognized. */
26142 ASSERT_NE(-1, recog_memoized (insn
));
26144 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
26145 insn
= NEXT_INSN (insn
);
26146 ASSERT_EQ (INSN
, GET_CODE (insn
));
26148 set
= single_set (insn
);
26149 ASSERT_NE (NULL
, set
);
26151 src
= SET_SRC (set
);
26152 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
26153 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
26156 /* Run all target-specific selftests. */
26159 ix86_run_selftests (void)
26161 ix86_test_dumping_hard_regs ();
26162 ix86_test_dumping_memory_blockage ();
26164 /* Various tests of loading RTL dumps, here because they contain
26165 ix86-isms (e.g. names of hard regs). */
26166 ix86_test_loading_dump_fragment_1 ();
26167 ix86_test_loading_call_insn ();
26168 ix86_test_loading_full_dump ();
26169 ix86_test_loading_unspec ();
26172 } // namespace selftest
26174 #endif /* CHECKING_P */
26176 static const scoped_attribute_specs
*const ix86_attribute_table
[] =
26178 &ix86_gnu_attribute_table
26181 /* Initialize the GCC target structure. */
26182 #undef TARGET_RETURN_IN_MEMORY
26183 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
26185 #undef TARGET_LEGITIMIZE_ADDRESS
26186 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
26188 #undef TARGET_ATTRIBUTE_TABLE
26189 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
26190 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
26191 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
26192 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26193 # undef TARGET_MERGE_DECL_ATTRIBUTES
26194 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
26197 #undef TARGET_INVALID_CONVERSION
26198 #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
26200 #undef TARGET_INVALID_UNARY_OP
26201 #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
26203 #undef TARGET_INVALID_BINARY_OP
26204 #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
26206 #undef TARGET_COMP_TYPE_ATTRIBUTES
26207 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
26209 #undef TARGET_INIT_BUILTINS
26210 #define TARGET_INIT_BUILTINS ix86_init_builtins
26211 #undef TARGET_BUILTIN_DECL
26212 #define TARGET_BUILTIN_DECL ix86_builtin_decl
26213 #undef TARGET_EXPAND_BUILTIN
26214 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
26216 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
26217 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
26218 ix86_builtin_vectorized_function
26220 #undef TARGET_VECTORIZE_BUILTIN_GATHER
26221 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
26223 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
26224 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
26226 #undef TARGET_BUILTIN_RECIPROCAL
26227 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
26229 #undef TARGET_ASM_FUNCTION_EPILOGUE
26230 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
26232 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
26233 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
26234 ix86_print_patchable_function_entry
26236 #undef TARGET_ENCODE_SECTION_INFO
26237 #ifndef SUBTARGET_ENCODE_SECTION_INFO
26238 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
26240 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
26243 #undef TARGET_ASM_OPEN_PAREN
26244 #define TARGET_ASM_OPEN_PAREN ""
26245 #undef TARGET_ASM_CLOSE_PAREN
26246 #define TARGET_ASM_CLOSE_PAREN ""
26248 #undef TARGET_ASM_BYTE_OP
26249 #define TARGET_ASM_BYTE_OP ASM_BYTE
26251 #undef TARGET_ASM_ALIGNED_HI_OP
26252 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
26253 #undef TARGET_ASM_ALIGNED_SI_OP
26254 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
26256 #undef TARGET_ASM_ALIGNED_DI_OP
26257 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
26260 #undef TARGET_PROFILE_BEFORE_PROLOGUE
26261 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
26263 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
26264 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
26266 #undef TARGET_ASM_UNALIGNED_HI_OP
26267 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
26268 #undef TARGET_ASM_UNALIGNED_SI_OP
26269 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
26270 #undef TARGET_ASM_UNALIGNED_DI_OP
26271 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
26273 #undef TARGET_PRINT_OPERAND
26274 #define TARGET_PRINT_OPERAND ix86_print_operand
26275 #undef TARGET_PRINT_OPERAND_ADDRESS
26276 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
26277 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
26278 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
26279 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
26280 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
26282 #undef TARGET_SCHED_INIT_GLOBAL
26283 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
26284 #undef TARGET_SCHED_ADJUST_COST
26285 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
26286 #undef TARGET_SCHED_ISSUE_RATE
26287 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26288 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26289 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26290 ia32_multipass_dfa_lookahead
26291 #undef TARGET_SCHED_MACRO_FUSION_P
26292 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
26293 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
26294 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
26296 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
26297 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26299 #undef TARGET_MEMMODEL_CHECK
26300 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
26302 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
26303 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
26306 #undef TARGET_HAVE_TLS
26307 #define TARGET_HAVE_TLS true
26309 #undef TARGET_CANNOT_FORCE_CONST_MEM
26310 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26311 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26312 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26314 #undef TARGET_DELEGITIMIZE_ADDRESS
26315 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26317 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
26318 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
26320 #undef TARGET_MS_BITFIELD_LAYOUT_P
26321 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26324 #undef TARGET_BINDS_LOCAL_P
26325 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26327 #undef TARGET_BINDS_LOCAL_P
26328 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
26330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26331 #undef TARGET_BINDS_LOCAL_P
26332 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26335 #undef TARGET_ASM_OUTPUT_MI_THUNK
26336 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26337 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26338 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26340 #undef TARGET_ASM_FILE_START
26341 #define TARGET_ASM_FILE_START x86_file_start
26343 #undef TARGET_OPTION_OVERRIDE
26344 #define TARGET_OPTION_OVERRIDE ix86_option_override
26346 #undef TARGET_REGISTER_MOVE_COST
26347 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
26348 #undef TARGET_MEMORY_MOVE_COST
26349 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
26350 #undef TARGET_RTX_COSTS
26351 #define TARGET_RTX_COSTS ix86_rtx_costs
26352 #undef TARGET_ADDRESS_COST
26353 #define TARGET_ADDRESS_COST ix86_address_cost
26355 #undef TARGET_OVERLAP_OP_BY_PIECES_P
26356 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
26358 #undef TARGET_FLAGS_REGNUM
26359 #define TARGET_FLAGS_REGNUM FLAGS_REG
26360 #undef TARGET_FIXED_CONDITION_CODE_REGS
26361 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26362 #undef TARGET_CC_MODES_COMPATIBLE
26363 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26365 #undef TARGET_MACHINE_DEPENDENT_REORG
26366 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26368 #undef TARGET_BUILD_BUILTIN_VA_LIST
26369 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26371 #undef TARGET_FOLD_BUILTIN
26372 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
26374 #undef TARGET_GIMPLE_FOLD_BUILTIN
26375 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
26377 #undef TARGET_COMPARE_VERSION_PRIORITY
26378 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
26380 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
26381 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
26382 ix86_generate_version_dispatcher_body
26384 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
26385 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
26386 ix86_get_function_versions_dispatcher
26388 #undef TARGET_ENUM_VA_LIST_P
26389 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
26391 #undef TARGET_FN_ABI_VA_LIST
26392 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
26394 #undef TARGET_CANONICAL_VA_LIST_TYPE
26395 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
26397 #undef TARGET_EXPAND_BUILTIN_VA_START
26398 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26400 #undef TARGET_MD_ASM_ADJUST
26401 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
26403 #undef TARGET_C_EXCESS_PRECISION
26404 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
26405 #undef TARGET_C_BITINT_TYPE_INFO
26406 #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
26407 #undef TARGET_PROMOTE_PROTOTYPES
26408 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26409 #undef TARGET_PUSH_ARGUMENT
26410 #define TARGET_PUSH_ARGUMENT ix86_push_argument
26411 #undef TARGET_SETUP_INCOMING_VARARGS
26412 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26413 #undef TARGET_MUST_PASS_IN_STACK
26414 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26415 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
26416 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
26417 #undef TARGET_FUNCTION_ARG_ADVANCE
26418 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
26419 #undef TARGET_FUNCTION_ARG
26420 #define TARGET_FUNCTION_ARG ix86_function_arg
26421 #undef TARGET_INIT_PIC_REG
26422 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
26423 #undef TARGET_USE_PSEUDO_PIC_REG
26424 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
26425 #undef TARGET_FUNCTION_ARG_BOUNDARY
26426 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
26427 #undef TARGET_PASS_BY_REFERENCE
26428 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26429 #undef TARGET_INTERNAL_ARG_POINTER
26430 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26431 #undef TARGET_UPDATE_STACK_BOUNDARY
26432 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
26433 #undef TARGET_GET_DRAP_RTX
26434 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
26435 #undef TARGET_STRICT_ARGUMENT_NAMING
26436 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26437 #undef TARGET_STATIC_CHAIN
26438 #define TARGET_STATIC_CHAIN ix86_static_chain
26439 #undef TARGET_TRAMPOLINE_INIT
26440 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
26441 #undef TARGET_RETURN_POPS_ARGS
26442 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
26444 #undef TARGET_WARN_FUNC_RETURN
26445 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
26447 #undef TARGET_LEGITIMATE_COMBINED_INSN
26448 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
26450 #undef TARGET_ASAN_SHADOW_OFFSET
26451 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
26453 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26454 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26456 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26457 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26459 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
26460 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
26461 ix86_libgcc_floating_mode_supported_p
26463 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26464 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26466 #undef TARGET_C_MODE_FOR_SUFFIX
26467 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26470 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26471 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26474 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26475 #undef TARGET_INSERT_ATTRIBUTES
26476 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26479 #undef TARGET_MANGLE_TYPE
26480 #define TARGET_MANGLE_TYPE ix86_mangle_type
26482 #undef TARGET_EMIT_SUPPORT_TINFOS
26483 #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
26485 #undef TARGET_STACK_PROTECT_GUARD
26486 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
26489 #undef TARGET_STACK_PROTECT_FAIL
26490 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26493 #undef TARGET_FUNCTION_VALUE
26494 #define TARGET_FUNCTION_VALUE ix86_function_value
26496 #undef TARGET_FUNCTION_VALUE_REGNO_P
26497 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
26499 #undef TARGET_ZERO_CALL_USED_REGS
26500 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
26502 #undef TARGET_PROMOTE_FUNCTION_MODE
26503 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
26505 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
26506 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
26508 #undef TARGET_MEMBER_TYPE_FORCES_BLK
26509 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
26511 #undef TARGET_INSTANTIATE_DECLS
26512 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
26514 #undef TARGET_SECONDARY_RELOAD
26515 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26516 #undef TARGET_SECONDARY_MEMORY_NEEDED
26517 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
26518 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
26519 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
26521 #undef TARGET_CLASS_MAX_NREGS
26522 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
26524 #undef TARGET_PREFERRED_RELOAD_CLASS
26525 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
26526 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
26527 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
26528 #undef TARGET_CLASS_LIKELY_SPILLED_P
26529 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
26531 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26532 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
26533 ix86_builtin_vectorization_cost
26534 #undef TARGET_VECTORIZE_VEC_PERM_CONST
26535 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
26536 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
26537 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
26538 ix86_preferred_simd_mode
26539 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
26540 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
26541 ix86_split_reduction
26542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
26543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
26544 ix86_autovectorize_vector_modes
26545 #undef TARGET_VECTORIZE_GET_MASK_MODE
26546 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
26547 #undef TARGET_VECTORIZE_CREATE_COSTS
26548 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
26550 #undef TARGET_SET_CURRENT_FUNCTION
26551 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
26553 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
26554 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
26556 #undef TARGET_OPTION_SAVE
26557 #define TARGET_OPTION_SAVE ix86_function_specific_save
26559 #undef TARGET_OPTION_RESTORE
26560 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
26562 #undef TARGET_OPTION_POST_STREAM_IN
26563 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
26565 #undef TARGET_OPTION_PRINT
26566 #define TARGET_OPTION_PRINT ix86_function_specific_print
26568 #undef TARGET_OPTION_FUNCTION_VERSIONS
26569 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
26571 #undef TARGET_CAN_INLINE_P
26572 #define TARGET_CAN_INLINE_P ix86_can_inline_p
26574 #undef TARGET_LEGITIMATE_ADDRESS_P
26575 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
26577 #undef TARGET_REGISTER_PRIORITY
26578 #define TARGET_REGISTER_PRIORITY ix86_register_priority
26580 #undef TARGET_REGISTER_USAGE_LEVELING_P
26581 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
26583 #undef TARGET_LEGITIMATE_CONSTANT_P
26584 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
26586 #undef TARGET_COMPUTE_FRAME_LAYOUT
26587 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
26589 #undef TARGET_FRAME_POINTER_REQUIRED
26590 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
26592 #undef TARGET_CAN_ELIMINATE
26593 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
26595 #undef TARGET_EXTRA_LIVE_ON_ENTRY
26596 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
26598 #undef TARGET_ASM_CODE_END
26599 #define TARGET_ASM_CODE_END ix86_code_end
26601 #undef TARGET_CONDITIONAL_REGISTER_USAGE
26602 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
26604 #undef TARGET_CANONICALIZE_COMPARISON
26605 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
26607 #undef TARGET_LOOP_UNROLL_ADJUST
26608 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
26610 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
26611 #undef TARGET_SPILL_CLASS
26612 #define TARGET_SPILL_CLASS ix86_spill_class
26614 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
26615 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
26616 ix86_simd_clone_compute_vecsize_and_simdlen
26618 #undef TARGET_SIMD_CLONE_ADJUST
26619 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
26621 #undef TARGET_SIMD_CLONE_USABLE
26622 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
26624 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
26625 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
26627 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
26628 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
26629 ix86_float_exceptions_rounding_supported_p
26631 #undef TARGET_MODE_EMIT
26632 #define TARGET_MODE_EMIT ix86_emit_mode_set
26634 #undef TARGET_MODE_NEEDED
26635 #define TARGET_MODE_NEEDED ix86_mode_needed
26637 #undef TARGET_MODE_AFTER
26638 #define TARGET_MODE_AFTER ix86_mode_after
26640 #undef TARGET_MODE_ENTRY
26641 #define TARGET_MODE_ENTRY ix86_mode_entry
26643 #undef TARGET_MODE_EXIT
26644 #define TARGET_MODE_EXIT ix86_mode_exit
26646 #undef TARGET_MODE_PRIORITY
26647 #define TARGET_MODE_PRIORITY ix86_mode_priority
26649 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
26650 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
26652 #undef TARGET_OFFLOAD_OPTIONS
26653 #define TARGET_OFFLOAD_OPTIONS \
26654 ix86_offload_options
26656 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
26657 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
26659 #undef TARGET_OPTAB_SUPPORTED_P
26660 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
26662 #undef TARGET_HARD_REGNO_SCRATCH_OK
26663 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
26665 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
26666 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
26668 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
26669 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
26671 #undef TARGET_INIT_LIBFUNCS
26672 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
26674 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
26675 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
26677 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
26678 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
26680 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
26681 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
26683 #undef TARGET_HARD_REGNO_NREGS
26684 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
26685 #undef TARGET_HARD_REGNO_MODE_OK
26686 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
26688 #undef TARGET_MODES_TIEABLE_P
26689 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
26691 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
26692 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
26693 ix86_hard_regno_call_part_clobbered
26695 #undef TARGET_INSN_CALLEE_ABI
26696 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
26698 #undef TARGET_CAN_CHANGE_MODE_CLASS
26699 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
26701 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
26702 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
26704 #undef TARGET_STATIC_RTX_ALIGNMENT
26705 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
26706 #undef TARGET_CONSTANT_ALIGNMENT
26707 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
26709 #undef TARGET_EMPTY_RECORD_P
26710 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
26712 #undef TARGET_WARN_PARAMETER_PASSING_ABI
26713 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
26715 #undef TARGET_GET_MULTILIB_ABI_NAME
26716 #define TARGET_GET_MULTILIB_ABI_NAME \
26717 ix86_get_multilib_abi_name
26719 #undef TARGET_IFUNC_REF_LOCAL_OK
26720 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
26722 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
26723 # undef TARGET_ASM_RELOC_RW_MASK
26724 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
26727 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
26728 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
26730 #undef TARGET_MEMTAG_ADD_TAG
26731 #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
26733 #undef TARGET_MEMTAG_SET_TAG
26734 #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
26736 #undef TARGET_MEMTAG_EXTRACT_TAG
26737 #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
26739 #undef TARGET_MEMTAG_UNTAGGED_POINTER
26740 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
26742 #undef TARGET_MEMTAG_TAG_SIZE
26743 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
26746 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED
)
26748 #ifdef OPTION_GLIBC
26750 return (built_in_function
)fcode
== BUILT_IN_MEMPCPY
;
26758 #undef TARGET_LIBC_HAS_FAST_FUNCTION
26759 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
26762 ix86_libm_function_max_error (unsigned cfn
, machine_mode mode
,
26765 #ifdef OPTION_GLIBC
26766 bool glibc_p
= OPTION_GLIBC
;
26768 bool glibc_p
= false;
26772 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
26773 unsigned int libmvec_ret
= 0;
26774 if (!flag_trapping_math
26775 && flag_unsafe_math_optimizations
26776 && flag_finite_math_only
26777 && !flag_signed_zeros
26778 && !flag_errno_math
)
26787 /* With non-default rounding modes, libmvec provides
26788 complete garbage in results. E.g.
26789 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
26790 returns 0.00333309174f rather than 1.40129846e-45f. */
26791 if (flag_rounding_math
)
26793 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
26794 claims libmvec maximum error is 4ulps.
26795 My own random testing indicates 2ulps for SFmode and
26796 0.5ulps for DFmode, but let's go with the 4ulps. */
26803 unsigned int ret
= glibc_linux_libm_function_max_error (cfn
, mode
,
26805 return MAX (ret
, libmvec_ret
);
26807 return default_libm_function_max_error (cfn
, mode
, boundary_p
);
26810 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
26811 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
26814 #undef TARGET_RUN_TARGET_SELFTESTS
26815 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
26816 #endif /* #if CHECKING_P */
26818 struct gcc_target targetm
= TARGET_INITIALIZER
;
26820 #include "gt-i386.h"