1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
63 #include "sched-int.h"
65 #include "tree-pass.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
74 #include "tree-iterator.h"
76 #include "case-cfn-macros.h"
78 #include "fold-const-call.h"
80 #include "tree-ssanames.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
86 #include "symbol-summary.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx
legitimize_dllimport_symbol (rtx
, bool);
103 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx
);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
122 const struct processor_costs
*ix86_tune_cost
= NULL
;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs
*ix86_cost
= NULL
;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
134 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
135 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
143 AREG
, DREG
, CREG
, BREG
,
145 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
147 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
148 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
152 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
153 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
155 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
156 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
158 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
159 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
160 /* SSE REX registers */
161 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
162 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
165 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
166 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
167 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
168 /* Mask registers. */
169 ALL_MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
170 MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
178 0, 2, 1, 3, 6, 7, 4, 5,
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
183 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
185 21, 22, 23, 24, 25, 26, 27, 28,
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
190 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
191 /* extended sse registers */
192 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
193 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
196 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
199 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
209 0, 1, 2, 3, 4, 5, 6, 7,
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
214 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
216 17, 18, 19, 20, 21, 22, 23, 24,
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
288 0, 2, 1, 3, 6, 7, 5, 4,
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM
, 9,
293 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
295 21, 22, 23, 24, 25, 26, 27, 28,
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
300 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
301 /* extended sse registers */
302 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
303 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
306 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
309 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers
[6] =
318 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers
[4] =
323 CX_REG
, DX_REG
, R8_REG
, R9_REG
326 static int const x86_64_int_return_registers
[4] =
328 AX_REG
, DX_REG
, DI_REG
, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry
{
337 struct stack_local_entry
*next
;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule
;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune
;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch
;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse
;
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary
;
355 /* Alignment for incoming stack boundary in bits specified at
357 unsigned int ix86_user_incoming_stack_boundary
;
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary
;
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary
;
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node
;
367 tree ms_va_list_type_node
;
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix
[16];
371 int internal_label_prefix_len
;
373 /* Fence to use after loop using movnt. */
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
386 X86_64_INTEGER_CLASS
,
387 X86_64_INTEGERSI_CLASS
,
394 X86_64_COMPLEX_X87_CLASS
,
398 #define MAX_CLASSES 8
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
402 static bool ext_80387_constants_init
;
405 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode
,
409 static rtx
ix86_static_chain (const_tree
, bool);
410 static int ix86_function_regparm (const_tree
, const_tree
);
411 static void ix86_compute_frame_layout (void);
412 static tree
ix86_canonical_va_list_type (tree
);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
416 static bool ix86_can_inline_p (tree
, tree
);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted
;
422 int ix86_arch_specified
;
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
435 ix86_using_red_zone (void)
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun
->machine
->has_local_indirect_jump
440 || cfun
->machine
->indirect_branch_type
== indirect_branch_keep
));
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
447 ix86_profile_before_prologue (void)
449 return flag_fentry
!= 0;
452 /* Update register usage after having seen the compiler flags. */
455 ix86_conditional_register_usage (void)
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
463 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
464 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
465 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
466 call_used_regs
[i
] = 0;
468 /* For 32-bit targets, disable the REX registers. */
471 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
473 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
475 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
482 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
484 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs
[i
] > 1)
489 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
494 && call_used_regs
[i
])
495 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
498 /* If MMX is disabled, disable the registers. */
500 accessible_reg_set
&= ~reg_class_contents
[MMX_REGS
];
502 /* If SSE is disabled, disable the registers. */
504 accessible_reg_set
&= ~reg_class_contents
[ALL_SSE_REGS
];
506 /* If the FPU is disabled, disable the registers. */
507 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
508 accessible_reg_set
&= ~reg_class_contents
[FLOAT_REGS
];
510 /* If AVX512F is disabled, disable the registers. */
511 if (! TARGET_AVX512F
)
513 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
516 accessible_reg_set
&= ~reg_class_contents
[ALL_MASK_REGS
];
520 /* Canonicalize a comparison from one we don't have to one we do have. */
523 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
524 bool op0_preserve_value
)
526 /* The order of operands in x87 ficom compare is forced by combine in
527 simplify_comparison () function. Float operator is treated as RTX_OBJ
528 with a precedence over other operators and is always put in the first
529 place. Swap condition and operands to match ficom instruction. */
530 if (!op0_preserve_value
531 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
533 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
535 /* We are called only for compares that are split to SAHF instruction.
536 Ensure that we have setcc/jcc insn for the swapped condition. */
537 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
539 std::swap (*op0
, *op1
);
546 /* Hook to determine if one function can safely inline another. */
549 ix86_can_inline_p (tree caller
, tree callee
)
551 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
552 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
554 /* Changes of those flags can be tolerated for always inlines. Lets hope
555 user knows what he is doing. */
556 const unsigned HOST_WIDE_INT always_inline_safe_mask
557 = (MASK_USE_8BIT_IDIV
| MASK_ACCUMULATE_OUTGOING_ARGS
558 | MASK_NO_ALIGN_STRINGOPS
| MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 | MASK_AVX256_SPLIT_UNALIGNED_STORE
| MASK_CLD
560 | MASK_NO_FANCY_MATH_387
| MASK_IEEE_FP
| MASK_INLINE_ALL_STRINGOPS
561 | MASK_INLINE_STRINGOPS_DYNAMICALLY
| MASK_RECIP
| MASK_STACK_PROBE
562 | MASK_STV
| MASK_TLS_DIRECT_SEG_REFS
| MASK_VZEROUPPER
563 | MASK_NO_PUSH_ARGS
| MASK_OMIT_LEAF_FRAME_POINTER
);
567 callee_tree
= target_option_default_node
;
569 caller_tree
= target_option_default_node
;
570 if (callee_tree
== caller_tree
)
573 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
574 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
577 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
578 && lookup_attribute ("always_inline",
579 DECL_ATTRIBUTES (callee
)));
581 cgraph_node
*callee_node
= cgraph_node::get (callee
);
582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583 function can inline a SSE2 function but a SSE2 function can't inline
585 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
586 != callee_opts
->x_ix86_isa_flags
)
587 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
588 != callee_opts
->x_ix86_isa_flags2
))
591 /* See if we have the same non-isa options. */
592 else if ((!always_inline
593 && caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
594 || (caller_opts
->x_target_flags
& ~always_inline_safe_mask
)
595 != (callee_opts
->x_target_flags
& ~always_inline_safe_mask
))
598 /* See if arch, tune, etc. are the same. */
599 else if (caller_opts
->arch
!= callee_opts
->arch
)
602 else if (!always_inline
&& caller_opts
->tune
!= callee_opts
->tune
)
605 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
606 /* If the calle doesn't use FP expressions differences in
607 ix86_fpmath can be ignored. We are called from FEs
608 for multi-versioning call optimization, so beware of
609 ipa_fn_summaries not available. */
610 && (! ipa_fn_summaries
611 || ipa_fn_summaries
->get (callee_node
) == NULL
612 || ipa_fn_summaries
->get (callee_node
)->fp_expressions
))
615 else if (!always_inline
616 && caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
625 /* Return true if this goes in large data/bss. */
628 ix86_in_large_data_p (tree exp
)
630 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
633 if (exp
== NULL_TREE
)
636 /* Functions are never large data. */
637 if (TREE_CODE (exp
) == FUNCTION_DECL
)
640 /* Automatic variables are never large data. */
641 if (VAR_P (exp
) && !is_global_var (exp
))
644 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
646 const char *section
= DECL_SECTION_NAME (exp
);
647 if (strcmp (section
, ".ldata") == 0
648 || strcmp (section
, ".lbss") == 0)
654 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
656 /* If this is an incomplete type with size 0, then we can't put it
657 in data because it might be too big when completed. Also,
658 int_size_in_bytes returns -1 if size can vary or is larger than
659 an integer in which case also it is safer to assume that it goes in
661 if (size
<= 0 || size
> ix86_section_threshold
)
668 /* i386-specific section flag to mark large sections. */
669 #define SECTION_LARGE SECTION_MACH_DEP
671 /* Switch to the appropriate section for output of DECL.
672 DECL is either a `VAR_DECL' node or a constant of some sort.
673 RELOC indicates whether forming the initial value of DECL requires
674 link-time relocations. */
676 ATTRIBUTE_UNUSED
static section
*
677 x86_64_elf_select_section (tree decl
, int reloc
,
678 unsigned HOST_WIDE_INT align
)
680 if (ix86_in_large_data_p (decl
))
682 const char *sname
= NULL
;
683 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
684 switch (categorize_decl_for_section (decl
, reloc
))
689 case SECCAT_DATA_REL
:
690 sname
= ".ldata.rel";
692 case SECCAT_DATA_REL_LOCAL
:
693 sname
= ".ldata.rel.local";
695 case SECCAT_DATA_REL_RO
:
696 sname
= ".ldata.rel.ro";
698 case SECCAT_DATA_REL_RO_LOCAL
:
699 sname
= ".ldata.rel.ro.local";
703 flags
|= SECTION_BSS
;
706 case SECCAT_RODATA_MERGE_STR
:
707 case SECCAT_RODATA_MERGE_STR_INIT
:
708 case SECCAT_RODATA_MERGE_CONST
:
710 flags
&= ~SECTION_WRITE
;
719 /* We don't split these for medium model. Place them into
720 default sections and hope for best. */
725 /* We might get called with string constants, but get_named_section
726 doesn't like them as they are not DECLs. Also, we need to set
727 flags in that case. */
729 return get_section (sname
, flags
, NULL
);
730 return get_named_section (decl
, sname
, reloc
);
733 return default_elf_select_section (decl
, reloc
, align
);
736 /* Select a set of attributes for section NAME based on the properties
737 of DECL and whether or not RELOC indicates that DECL's initializer
738 might contain runtime relocations. */
740 static unsigned int ATTRIBUTE_UNUSED
741 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
743 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
745 if (ix86_in_large_data_p (decl
))
746 flags
|= SECTION_LARGE
;
748 if (decl
== NULL_TREE
749 && (strcmp (name
, ".ldata.rel.ro") == 0
750 || strcmp (name
, ".ldata.rel.ro.local") == 0))
751 flags
|= SECTION_RELRO
;
753 if (strcmp (name
, ".lbss") == 0
754 || strncmp (name
, ".lbss.", sizeof (".lbss.") - 1) == 0
755 || strncmp (name
, ".gnu.linkonce.lb.",
756 sizeof (".gnu.linkonce.lb.") - 1) == 0)
757 flags
|= SECTION_BSS
;
762 /* Build up a unique section name, expressed as a
763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764 RELOC indicates whether the initial value of EXP requires
765 link-time relocations. */
767 static void ATTRIBUTE_UNUSED
768 x86_64_elf_unique_section (tree decl
, int reloc
)
770 if (ix86_in_large_data_p (decl
))
772 const char *prefix
= NULL
;
773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
774 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
776 switch (categorize_decl_for_section (decl
, reloc
))
779 case SECCAT_DATA_REL
:
780 case SECCAT_DATA_REL_LOCAL
:
781 case SECCAT_DATA_REL_RO
:
782 case SECCAT_DATA_REL_RO_LOCAL
:
783 prefix
= one_only
? ".ld" : ".ldata";
786 prefix
= one_only
? ".lb" : ".lbss";
789 case SECCAT_RODATA_MERGE_STR
:
790 case SECCAT_RODATA_MERGE_STR_INIT
:
791 case SECCAT_RODATA_MERGE_CONST
:
792 prefix
= one_only
? ".lr" : ".lrodata";
801 /* We don't split these for medium model. Place them into
802 default sections and hope for best. */
807 const char *name
, *linkonce
;
810 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
811 name
= targetm
.strip_name_encoding (name
);
813 /* If we're using one_only, then there needs to be a .gnu.linkonce
814 prefix to the section name. */
815 linkonce
= one_only
? ".gnu.linkonce" : "";
817 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
819 set_decl_section_name (decl
, string
);
823 default_unique_section (decl
, reloc
);
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
832 /* This says how to output assembler code to declare an
833 uninitialized external linkage data object.
835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
838 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
839 const char *name
, unsigned HOST_WIDE_INT size
,
842 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
843 && size
> (unsigned int)ix86_section_threshold
)
845 switch_to_section (get_named_section (decl
, ".lbss", 0));
846 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
849 fputs (COMMON_ASM_OP
, file
);
850 assemble_name (file
, name
);
851 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
852 size
, align
/ BITS_PER_UNIT
);
856 /* Utility function for targets to use in implementing
857 ASM_OUTPUT_ALIGNED_BSS. */
860 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
861 unsigned HOST_WIDE_INT size
, int align
)
863 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
864 && size
> (unsigned int)ix86_section_threshold
)
865 switch_to_section (get_named_section (decl
, ".lbss", 0));
867 switch_to_section (bss_section
);
868 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870 last_assemble_variable_decl
= decl
;
871 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
873 /* Standard thing is just output label for the object. */
874 ASM_OUTPUT_LABEL (file
, name
);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
879 /* Decide whether we must probe the stack before any space allocation
880 on this target. It's essentially TARGET_STACK_PROBE except when
881 -fstack-check causes the stack to be already probed differently. */
884 ix86_target_stack_probe (void)
886 /* Do not probe the stack twice if static stack checking is enabled. */
887 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
890 return TARGET_STACK_PROBE
;
893 /* Decide whether we can make a sibling call to a function. DECL is the
894 declaration of the function being targeted by the call and EXP is the
895 CALL_EXPR representing the call. */
898 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
900 tree type
, decl_or_type
;
902 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
904 if (ix86_function_naked (current_function_decl
))
907 /* Sibling call isn't OK if there are no caller-saved registers
908 since all registers must be preserved before return. */
909 if (cfun
->machine
->no_caller_saved_registers
)
912 /* If we are generating position-independent code, we cannot sibcall
913 optimize direct calls to global functions, as the PLT requires
914 %ebx be live. (Darwin does not have a PLT.) */
922 /* If we need to align the outgoing stack, then sibcalling would
923 unalign the stack, which may break the called function. */
924 if (ix86_minimum_incoming_stack_boundary (true)
925 < PREFERRED_STACK_BOUNDARY
)
931 type
= TREE_TYPE (decl
);
935 /* We're looking at the CALL_EXPR, we need the type of the function. */
936 type
= CALL_EXPR_FN (exp
); /* pointer expression */
937 type
= TREE_TYPE (type
); /* pointer type */
938 type
= TREE_TYPE (type
); /* function type */
942 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
943 if ((OUTGOING_REG_PARM_STACK_SPACE (type
)
944 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl
)))
945 || (REG_PARM_STACK_SPACE (decl_or_type
)
946 != REG_PARM_STACK_SPACE (current_function_decl
)))
948 maybe_complain_about_tail_call (exp
,
949 "inconsistent size of stack space"
950 " allocated for arguments which are"
951 " passed in registers");
955 /* Check that the return value locations are the same. Like
956 if we are returning floats on the 80387 register stack, we cannot
957 make a sibcall from a function that doesn't return a float to a
958 function that does or, conversely, from a function that does return
959 a float to a function that doesn't; the necessary stack adjustment
960 would not be executed. This is also the place we notice
961 differences in the return value ABI. Note that it is ok for one
962 of the functions to have void return type as long as the return
963 value of the other is passed in a register. */
964 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
965 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
967 if (STACK_REG_P (a
) || STACK_REG_P (b
))
969 if (!rtx_equal_p (a
, b
))
972 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
974 else if (!rtx_equal_p (a
, b
))
979 /* The SYSV ABI has more call-clobbered registers;
980 disallow sibcalls from MS to SYSV. */
981 if (cfun
->machine
->call_abi
== MS_ABI
982 && ix86_function_type_abi (type
) == SYSV_ABI
)
987 /* If this call is indirect, we'll need to be able to use a
988 call-clobbered register for the address of the target function.
989 Make sure that all such registers are not used for passing
990 parameters. Note that DLLIMPORT functions and call to global
991 function via GOT slot are indirect. */
993 || (bind_global
&& flag_pic
&& !flag_plt
)
994 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
))
995 || flag_force_indirect_call
)
997 /* Check if regparm >= 3 since arg_reg_available is set to
998 false if regparm == 0. If regparm is 1 or 2, there is
999 always a call-clobbered register available.
1001 ??? The symbol indirect call doesn't need a call-clobbered
1002 register. But we don't know if this is a symbol indirect
1003 call or not here. */
1004 if (ix86_function_regparm (type
, decl
) >= 3
1005 && !cfun
->machine
->arg_reg_available
)
1010 /* Otherwise okay. That also includes certain types of indirect calls. */
1014 /* This function determines from TYPE the calling-convention. */
1017 ix86_get_callcvt (const_tree type
)
1019 unsigned int ret
= 0;
1024 return IX86_CALLCVT_CDECL
;
1026 attrs
= TYPE_ATTRIBUTES (type
);
1027 if (attrs
!= NULL_TREE
)
1029 if (lookup_attribute ("cdecl", attrs
))
1030 ret
|= IX86_CALLCVT_CDECL
;
1031 else if (lookup_attribute ("stdcall", attrs
))
1032 ret
|= IX86_CALLCVT_STDCALL
;
1033 else if (lookup_attribute ("fastcall", attrs
))
1034 ret
|= IX86_CALLCVT_FASTCALL
;
1035 else if (lookup_attribute ("thiscall", attrs
))
1036 ret
|= IX86_CALLCVT_THISCALL
;
1038 /* Regparam isn't allowed for thiscall and fastcall. */
1039 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
1041 if (lookup_attribute ("regparm", attrs
))
1042 ret
|= IX86_CALLCVT_REGPARM
;
1043 if (lookup_attribute ("sseregparm", attrs
))
1044 ret
|= IX86_CALLCVT_SSEREGPARM
;
1047 if (IX86_BASE_CALLCVT(ret
) != 0)
1051 is_stdarg
= stdarg_p (type
);
1052 if (TARGET_RTD
&& !is_stdarg
)
1053 return IX86_CALLCVT_STDCALL
| ret
;
1057 || TREE_CODE (type
) != METHOD_TYPE
1058 || ix86_function_type_abi (type
) != MS_ABI
)
1059 return IX86_CALLCVT_CDECL
| ret
;
1061 return IX86_CALLCVT_THISCALL
;
1064 /* Return 0 if the attributes for two types are incompatible, 1 if they
1065 are compatible, and 2 if they are nearly compatible (which causes a
1066 warning to be generated). */
1069 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
1071 unsigned int ccvt1
, ccvt2
;
1073 if (TREE_CODE (type1
) != FUNCTION_TYPE
1074 && TREE_CODE (type1
) != METHOD_TYPE
)
1077 ccvt1
= ix86_get_callcvt (type1
);
1078 ccvt2
= ix86_get_callcvt (type2
);
1081 if (ix86_function_regparm (type1
, NULL
)
1082 != ix86_function_regparm (type2
, NULL
))
1088 /* Return the regparm value for a function with the indicated TYPE and DECL.
1089 DECL may be NULL when calling function indirectly
1090 or considering a libcall. */
1093 ix86_function_regparm (const_tree type
, const_tree decl
)
1100 return (ix86_function_type_abi (type
) == SYSV_ABI
1101 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
1102 ccvt
= ix86_get_callcvt (type
);
1103 regparm
= ix86_regparm
;
1105 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
1107 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1110 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1114 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1116 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1119 /* Use register calling convention for local functions when possible. */
1121 && TREE_CODE (decl
) == FUNCTION_DECL
)
1123 cgraph_node
*target
= cgraph_node::get (decl
);
1125 target
= target
->function_symbol ();
1127 /* Caller and callee must agree on the calling convention, so
1128 checking here just optimize means that with
1129 __attribute__((optimize (...))) caller could use regparm convention
1130 and callee not, or vice versa. Instead look at whether the callee
1131 is optimized or not. */
1132 if (target
&& opt_for_fn (target
->decl
, optimize
)
1133 && !(profile_flag
&& !flag_fentry
))
1135 if (target
->local
&& target
->can_change_signature
)
1137 int local_regparm
, globals
= 0, regno
;
1139 /* Make sure no regparm register is taken by a
1140 fixed register variable. */
1141 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
1143 if (fixed_regs
[local_regparm
])
1146 /* We don't want to use regparm(3) for nested functions as
1147 these use a static chain pointer in the third argument. */
1148 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
1151 /* Save a register for the split stack. */
1152 if (flag_split_stack
)
1154 if (local_regparm
== 3)
1156 else if (local_regparm
== 2
1157 && DECL_STATIC_CHAIN (target
->decl
))
1161 /* Each fixed register usage increases register pressure,
1162 so less registers should be used for argument passing.
1163 This functionality can be overriden by an explicit
1165 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
1166 if (fixed_regs
[regno
])
1170 = globals
< local_regparm
? local_regparm
- globals
: 0;
1172 if (local_regparm
> regparm
)
1173 regparm
= local_regparm
;
1181 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1182 DFmode (2) arguments in SSE registers for a function with the
1183 indicated TYPE and DECL. DECL may be NULL when calling function
1184 indirectly or considering a libcall. Return -1 if any FP parameter
1185 should be rejected by error. This is used in siutation we imply SSE
1186 calling convetion but the function is called from another function with
1187 SSE disabled. Otherwise return 0. */
1190 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
1192 gcc_assert (!TARGET_64BIT
);
1194 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1195 by the sseregparm attribute. */
1196 if (TARGET_SSEREGPARM
1197 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
1204 error ("calling %qD with attribute sseregparm without "
1205 "SSE/SSE2 enabled", decl
);
1207 error ("calling %qT with attribute sseregparm without "
1208 "SSE/SSE2 enabled", type
);
1219 cgraph_node
*target
= cgraph_node::get (decl
);
1221 target
= target
->function_symbol ();
1223 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1224 (and DFmode for SSE2) arguments in SSE registers. */
1226 /* TARGET_SSE_MATH */
1227 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
1228 && opt_for_fn (target
->decl
, optimize
)
1229 && !(profile_flag
&& !flag_fentry
))
1231 if (target
->local
&& target
->can_change_signature
)
1233 /* Refuse to produce wrong code when local function with SSE enabled
1234 is called from SSE disabled function.
1235 FIXME: We need a way to detect these cases cross-ltrans partition
1236 and avoid using SSE calling conventions on local functions called
1237 from function with SSE disabled. For now at least delay the
1238 warning until we know we are going to produce wrong code.
1240 if (!TARGET_SSE
&& warn
)
1242 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
1243 ->x_ix86_isa_flags
) ? 2 : 1;
1250 /* Return true if EAX is live at the start of the function. Used by
1251 ix86_expand_prologue to determine if we need special help before
1252 calling allocate_stack_worker. */
1255 ix86_eax_live_at_start_p (void)
1257 /* Cheat. Don't bother working forward from ix86_function_regparm
1258 to the function type to whether an actual argument is located in
1259 eax. Instead just look at cfg info, which is still close enough
1260 to correct at this point. This gives false positives for broken
1261 functions that might use uninitialized data that happens to be
1262 allocated in eax, but who cares? */
1263 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
1267 ix86_keep_aggregate_return_pointer (tree fntype
)
1273 attr
= lookup_attribute ("callee_pop_aggregate_return",
1274 TYPE_ATTRIBUTES (fntype
));
1276 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
1278 /* For 32-bit MS-ABI the default is to keep aggregate
1280 if (ix86_function_type_abi (fntype
) == MS_ABI
)
1283 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
1286 /* Value is the number of bytes of arguments automatically
1287 popped when returning from a subroutine call.
1288 FUNDECL is the declaration node of the function (as a tree),
1289 FUNTYPE is the data type of the function (as a tree),
1290 or for a library call it is an identifier node for the subroutine name.
1291 SIZE is the number of bytes of arguments passed on the stack.
1293 On the 80386, the RTD insn may be used to pop them if the number
1294 of args is fixed, but if the number is variable then the caller
1295 must pop them all. RTD can't be used for library calls now
1296 because the library is compiled with the Unix compiler.
1297 Use of RTD is a selectable option, since it is incompatible with
1298 standard Unix calling sequences. If the option is not selected,
1299 the caller must always pop the args.
1301 The attribute stdcall is equivalent to RTD on a per module basis. */
1304 ix86_return_pops_args (tree fundecl
, tree funtype
, poly_int64 size
)
1308 /* None of the 64-bit ABIs pop arguments. */
1312 ccvt
= ix86_get_callcvt (funtype
);
1314 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
1315 | IX86_CALLCVT_THISCALL
)) != 0
1316 && ! stdarg_p (funtype
))
1319 /* Lose any fake structure return argument if it is passed on the stack. */
1320 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1321 && !ix86_keep_aggregate_return_pointer (funtype
))
1323 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1325 return GET_MODE_SIZE (Pmode
);
1331 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1334 ix86_legitimate_combined_insn (rtx_insn
*insn
)
1338 /* Check operand constraints in case hard registers were propagated
1339 into insn pattern. This check prevents combine pass from
1340 generating insn patterns with invalid hard register operands.
1341 These invalid insns can eventually confuse reload to error out
1342 with a spill failure. See also PRs 46829 and 46843. */
1344 gcc_assert (INSN_CODE (insn
) >= 0);
1346 extract_insn (insn
);
1347 preprocess_constraints (insn
);
1349 int n_operands
= recog_data
.n_operands
;
1350 int n_alternatives
= recog_data
.n_alternatives
;
1351 for (i
= 0; i
< n_operands
; i
++)
1353 rtx op
= recog_data
.operand
[i
];
1354 machine_mode mode
= GET_MODE (op
);
1355 const operand_alternative
*op_alt
;
1360 /* A unary operator may be accepted by the predicate, but it
1361 is irrelevant for matching constraints. */
1367 if (REG_P (SUBREG_REG (op
))
1368 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
1369 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
1370 GET_MODE (SUBREG_REG (op
)),
1373 op
= SUBREG_REG (op
);
1376 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
1379 op_alt
= recog_op_alt
;
1381 /* Operand has no constraints, anything is OK. */
1382 win
= !n_alternatives
;
1384 alternative_mask preferred
= get_preferred_alternatives (insn
);
1385 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
1387 if (!TEST_BIT (preferred
, j
))
1389 if (op_alt
[i
].anything_ok
1390 || (op_alt
[i
].matches
!= -1
1392 (recog_data
.operand
[i
],
1393 recog_data
.operand
[op_alt
[i
].matches
]))
1394 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
1408 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1410 static unsigned HOST_WIDE_INT
1411 ix86_asan_shadow_offset (void)
1413 return SUBTARGET_SHADOW_OFFSET
;
1416 /* Argument support functions. */
1418 /* Return true when register may be used to pass function parameters. */
1420 ix86_function_arg_regno_p (int regno
)
1423 enum calling_abi call_abi
;
1424 const int *parm_regs
;
1429 return (regno
< REGPARM_MAX
1430 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1432 return (regno
< REGPARM_MAX
1433 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
1434 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
1435 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
1436 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
1439 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
1440 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
1443 /* TODO: The function should depend on current function ABI but
1444 builtins.c would need updating then. Therefore we use the
1446 call_abi
= ix86_cfun_abi ();
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
1452 if (call_abi
== MS_ABI
)
1453 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
1455 parm_regs
= x86_64_int_parameter_registers
;
1457 for (i
= 0; i
< (call_abi
== MS_ABI
1458 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
1459 if (regno
== parm_regs
[i
])
1464 /* Return if we do not know how to pass ARG solely in registers. */
1467 ix86_must_pass_in_stack (const function_arg_info
&arg
)
1469 if (must_pass_in_stack_var_size_or_pad (arg
))
1472 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1473 The layout_type routine is crafty and tries to trick us into passing
1474 currently unsupported vector types on the stack by using TImode. */
1475 return (!TARGET_64BIT
&& arg
.mode
== TImode
1476 && arg
.type
&& TREE_CODE (arg
.type
) != VECTOR_TYPE
);
1479 /* It returns the size, in bytes, of the area reserved for arguments passed
1480 in registers for the function represented by fndecl dependent to the used
1483 ix86_reg_parm_stack_space (const_tree fndecl
)
1485 enum calling_abi call_abi
= SYSV_ABI
;
1486 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
1487 call_abi
= ix86_function_abi (fndecl
);
1489 call_abi
= ix86_function_type_abi (fndecl
);
1490 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
1495 /* We add this as a workaround in order to use libc_has_function
1498 ix86_libc_has_function (enum function_class fn_class
)
1500 return targetm
.libc_has_function (fn_class
, NULL_TREE
);
1503 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1504 specifying the call abi used. */
1506 ix86_function_type_abi (const_tree fntype
)
1508 enum calling_abi abi
= ix86_abi
;
1510 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
1514 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
1517 if (TARGET_X32
&& !warned
)
1519 error ("X32 does not support %<ms_abi%> attribute");
1525 else if (abi
== MS_ABI
1526 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
1533 ix86_function_abi (const_tree fndecl
)
1535 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
1538 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1539 specifying the call abi used. */
1541 ix86_cfun_abi (void)
1543 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
1547 ix86_function_ms_hook_prologue (const_tree fn
)
1549 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
1551 if (decl_function_context (fn
) != NULL_TREE
)
1552 error_at (DECL_SOURCE_LOCATION (fn
),
1553 "%<ms_hook_prologue%> attribute is not compatible "
1554 "with nested function");
1562 ix86_function_naked (const_tree fn
)
1564 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
1570 /* Write the extra assembler code needed to declare a function properly. */
1573 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
1576 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
1579 cfun
->machine
->function_label_emitted
= true;
1583 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
1584 unsigned int filler_cc
= 0xcccccccc;
1586 for (i
= 0; i
< filler_count
; i
+= 4)
1587 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
1590 #ifdef SUBTARGET_ASM_UNWIND_INIT
1591 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
1594 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
1596 /* Output magic byte marker, if hot-patch attribute is set. */
1601 /* leaq [%rsp + 0], %rsp */
1602 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1607 /* movl.s %edi, %edi
1609 movl.s %esp, %ebp */
1610 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file
);
1615 /* Implementation of call abi switching target hook. Specific to FNDECL
1616 the specific call register sets are set. See also
1617 ix86_conditional_register_usage for more details. */
1619 ix86_call_abi_override (const_tree fndecl
)
1621 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
1624 /* Return 1 if pseudo register should be created and used to hold
1625 GOT address for PIC code. */
1627 ix86_use_pseudo_pic_reg (void)
1630 && (ix86_cmodel
== CM_SMALL_PIC
1637 /* Initialize large model PIC register. */
1640 ix86_init_large_pic_reg (unsigned int tmp_regno
)
1642 rtx_code_label
*label
;
1645 gcc_assert (Pmode
== DImode
);
1646 label
= gen_label_rtx ();
1648 LABEL_PRESERVE_P (label
) = 1;
1649 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
1650 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
1651 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
1653 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
1654 emit_insn (gen_add2_insn (pic_offset_table_rtx
, tmp_reg
));
1655 const char *name
= LABEL_NAME (label
);
1656 PUT_CODE (label
, NOTE
);
1657 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
1658 NOTE_DELETED_LABEL_NAME (label
) = name
;
1661 /* Create and initialize PIC register if required. */
1663 ix86_init_pic_reg (void)
1668 if (!ix86_use_pseudo_pic_reg ())
1675 if (ix86_cmodel
== CM_LARGE_PIC
)
1676 ix86_init_large_pic_reg (R11_REG
);
1678 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
1682 /* If there is future mcount call in the function it is more profitable
1683 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1684 rtx reg
= crtl
->profile
1685 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
1686 : pic_offset_table_rtx
;
1687 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
1688 RTX_FRAME_RELATED_P (insn
) = 1;
1690 emit_move_insn (pic_offset_table_rtx
, reg
);
1691 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
1697 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1698 insert_insn_on_edge (seq
, entry_edge
);
1699 commit_one_edge_insertion (entry_edge
);
1702 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1703 for a call to a function whose data type is FNTYPE.
1704 For a library call, FNTYPE is 0. */
1707 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1708 tree fntype
, /* tree ptr for function decl */
1709 rtx libname
, /* SYMBOL_REF of library name or 0 */
1713 struct cgraph_node
*local_info_node
= NULL
;
1714 struct cgraph_node
*target
= NULL
;
1716 memset (cum
, 0, sizeof (*cum
));
1720 target
= cgraph_node::get (fndecl
);
1723 target
= target
->function_symbol ();
1724 local_info_node
= cgraph_node::local_info_node (target
->decl
);
1725 cum
->call_abi
= ix86_function_abi (target
->decl
);
1728 cum
->call_abi
= ix86_function_abi (fndecl
);
1731 cum
->call_abi
= ix86_function_type_abi (fntype
);
1733 cum
->caller
= caller
;
1735 /* Set up the number of registers to use for passing arguments. */
1736 cum
->nregs
= ix86_regparm
;
1739 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
1740 ? X86_64_REGPARM_MAX
1741 : X86_64_MS_REGPARM_MAX
);
1745 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1748 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
1749 ? X86_64_SSE_REGPARM_MAX
1750 : X86_64_MS_SSE_REGPARM_MAX
);
1754 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1755 cum
->warn_avx512f
= true;
1756 cum
->warn_avx
= true;
1757 cum
->warn_sse
= true;
1758 cum
->warn_mmx
= true;
1760 /* Because type might mismatch in between caller and callee, we need to
1761 use actual type of function for local calls.
1762 FIXME: cgraph_analyze can be told to actually record if function uses
1763 va_start so for local functions maybe_vaarg can be made aggressive
1765 FIXME: once typesytem is fixed, we won't need this code anymore. */
1766 if (local_info_node
&& local_info_node
->local
1767 && local_info_node
->can_change_signature
)
1768 fntype
= TREE_TYPE (target
->decl
);
1769 cum
->stdarg
= stdarg_p (fntype
);
1770 cum
->maybe_vaarg
= (fntype
1771 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
1776 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
1777 if (!cum
->warn_empty
&& fntype
)
1779 function_args_iterator iter
;
1781 bool seen_empty_type
= false;
1782 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
1784 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
1786 if (TYPE_EMPTY_P (argtype
))
1787 seen_empty_type
= true;
1788 else if (seen_empty_type
)
1790 cum
->warn_empty
= true;
1798 /* If there are variable arguments, then we won't pass anything
1799 in registers in 32-bit mode. */
1800 if (stdarg_p (fntype
))
1803 /* Since in 32-bit, variable arguments are always passed on
1804 stack, there is scratch register available for indirect
1806 cfun
->machine
->arg_reg_available
= true;
1809 cum
->warn_avx512f
= false;
1810 cum
->warn_avx
= false;
1811 cum
->warn_sse
= false;
1812 cum
->warn_mmx
= false;
1816 /* Use ecx and edx registers if function has fastcall attribute,
1817 else look for regparm information. */
1820 unsigned int ccvt
= ix86_get_callcvt (fntype
);
1821 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1824 cum
->fastcall
= 1; /* Same first register as in fastcall. */
1826 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1832 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1835 /* Set up the number of SSE registers used for passing SFmode
1836 and DFmode arguments. Warn for mismatching ABI. */
1837 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
1840 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
1843 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1844 But in the case of vector types, it is some vector mode.
1846 When we have only some of our vector isa extensions enabled, then there
1847 are some modes for which vector_mode_supported_p is false. For these
1848 modes, the generic vector support in gcc will choose some non-vector mode
1849 in order to implement the type. By computing the natural mode, we'll
1850 select the proper ABI location for the operand and not depend on whatever
1851 the middle-end decides to do with these vector types.
1853 The midde-end can't deal with the vector types > 16 bytes. In this
1854 case, we return the original mode and warn ABI change if CUM isn't
1857 If INT_RETURN is true, warn ABI change if the vector mode isn't
1858 available for function return value. */
1861 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
1864 machine_mode mode
= TYPE_MODE (type
);
1866 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
1868 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1869 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
1870 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1871 && TYPE_VECTOR_SUBPARTS (type
) > 1)
1873 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
1875 /* There are no XFmode vector modes. */
1876 if (innermode
== XFmode
)
1879 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
1880 mode
= MIN_MODE_VECTOR_FLOAT
;
1882 mode
= MIN_MODE_VECTOR_INT
;
1884 /* Get the mode which has this inner mode and number of units. */
1885 FOR_EACH_MODE_FROM (mode
, mode
)
1886 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
1887 && GET_MODE_INNER (mode
) == innermode
)
1889 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
1891 static bool warnedavx512f
;
1892 static bool warnedavx512f_ret
;
1894 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
1896 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
1897 "without AVX512F enabled changes the ABI"))
1898 warnedavx512f
= true;
1900 else if (in_return
&& !warnedavx512f_ret
)
1902 if (warning (OPT_Wpsabi
, "AVX512F vector return "
1903 "without AVX512F enabled changes the ABI"))
1904 warnedavx512f_ret
= true;
1907 return TYPE_MODE (type
);
1909 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
1911 static bool warnedavx
;
1912 static bool warnedavx_ret
;
1914 if (cum
&& cum
->warn_avx
&& !warnedavx
)
1916 if (warning (OPT_Wpsabi
, "AVX vector argument "
1917 "without AVX enabled changes the ABI"))
1920 else if (in_return
&& !warnedavx_ret
)
1922 if (warning (OPT_Wpsabi
, "AVX vector return "
1923 "without AVX enabled changes the ABI"))
1924 warnedavx_ret
= true;
1927 return TYPE_MODE (type
);
1929 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
1933 static bool warnedsse
;
1934 static bool warnedsse_ret
;
1936 if (cum
&& cum
->warn_sse
&& !warnedsse
)
1938 if (warning (OPT_Wpsabi
, "SSE vector argument "
1939 "without SSE enabled changes the ABI"))
1942 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
1944 if (warning (OPT_Wpsabi
, "SSE vector return "
1945 "without SSE enabled changes the ABI"))
1946 warnedsse_ret
= true;
1949 else if ((size
== 8 && !TARGET_64BIT
)
1951 || cfun
->machine
->func_type
== TYPE_NORMAL
)
1955 static bool warnedmmx
;
1956 static bool warnedmmx_ret
;
1958 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
1960 if (warning (OPT_Wpsabi
, "MMX vector argument "
1961 "without MMX enabled changes the ABI"))
1964 else if (in_return
&& !warnedmmx_ret
)
1966 if (warning (OPT_Wpsabi
, "MMX vector return "
1967 "without MMX enabled changes the ABI"))
1968 warnedmmx_ret
= true;
1981 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1982 this may not agree with the mode that the type system has chosen for the
1983 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1984 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1987 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
1992 if (orig_mode
!= BLKmode
)
1993 tmp
= gen_rtx_REG (orig_mode
, regno
);
1996 tmp
= gen_rtx_REG (mode
, regno
);
1997 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
1998 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
2004 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2005 of this code is to classify each 8bytes of incoming argument by the register
2006 class and assign registers accordingly. */
2008 /* Return the union class of CLASS1 and CLASS2.
2009 See the x86-64 PS ABI for details. */
2011 static enum x86_64_reg_class
2012 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2014 /* Rule #1: If both classes are equal, this is the resulting class. */
2015 if (class1
== class2
)
2018 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2020 if (class1
== X86_64_NO_CLASS
)
2022 if (class2
== X86_64_NO_CLASS
)
2025 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2026 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2027 return X86_64_MEMORY_CLASS
;
2029 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2030 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2031 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2032 return X86_64_INTEGERSI_CLASS
;
2033 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2034 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2035 return X86_64_INTEGER_CLASS
;
2037 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2039 if (class1
== X86_64_X87_CLASS
2040 || class1
== X86_64_X87UP_CLASS
2041 || class1
== X86_64_COMPLEX_X87_CLASS
2042 || class2
== X86_64_X87_CLASS
2043 || class2
== X86_64_X87UP_CLASS
2044 || class2
== X86_64_COMPLEX_X87_CLASS
)
2045 return X86_64_MEMORY_CLASS
;
2047 /* Rule #6: Otherwise class SSE is used. */
2048 return X86_64_SSE_CLASS
;
2051 /* Classify the argument of type TYPE and mode MODE.
2052 CLASSES will be filled by the register class used to pass each word
2053 of the operand. The number of words is returned. In case the parameter
2054 should be passed in memory, 0 is returned. As a special case for zero
2055 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2057 BIT_OFFSET is used internally for handling records and specifies offset
2058 of the offset in bits modulo 512 to avoid overflow cases.
2060 See the x86-64 PS ABI for details.
2064 classify_argument (machine_mode mode
, const_tree type
,
2065 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2068 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2069 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
2071 /* Variable sized entities are always passed/returned in memory. */
2075 if (mode
!= VOIDmode
)
2077 /* The value of "named" doesn't matter. */
2078 function_arg_info
arg (const_cast<tree
> (type
), mode
, /*named=*/true);
2079 if (targetm
.calls
.must_pass_in_stack (arg
))
2083 if (type
&& AGGREGATE_TYPE_P (type
))
2087 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2089 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2093 for (i
= 0; i
< words
; i
++)
2094 classes
[i
] = X86_64_NO_CLASS
;
2096 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2097 signalize memory class, so handle it as special case. */
2100 classes
[0] = X86_64_NO_CLASS
;
2104 /* Classify each field of record and merge classes. */
2105 switch (TREE_CODE (type
))
2108 /* And now merge the fields of structure. */
2109 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2111 if (TREE_CODE (field
) == FIELD_DECL
)
2115 if (TREE_TYPE (field
) == error_mark_node
)
2118 /* Bitfields are always classified as integer. Handle them
2119 early, since later code would consider them to be
2120 misaligned integers. */
2121 if (DECL_BIT_FIELD (field
))
2123 for (i
= (int_bit_position (field
)
2124 + (bit_offset
% 64)) / 8 / 8;
2125 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2126 + tree_to_shwi (DECL_SIZE (field
))
2129 = merge_classes (X86_64_INTEGER_CLASS
, classes
[i
]);
2135 type
= TREE_TYPE (field
);
2137 /* Flexible array member is ignored. */
2138 if (TYPE_MODE (type
) == BLKmode
2139 && TREE_CODE (type
) == ARRAY_TYPE
2140 && TYPE_SIZE (type
) == NULL_TREE
2141 && TYPE_DOMAIN (type
) != NULL_TREE
2142 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
2147 if (!warned
&& warn_psabi
)
2150 inform (input_location
,
2151 "the ABI of passing struct with"
2152 " a flexible array member has"
2153 " changed in GCC 4.4");
2157 num
= classify_argument (TYPE_MODE (type
), type
,
2159 (int_bit_position (field
)
2160 + bit_offset
) % 512);
2163 pos
= (int_bit_position (field
)
2164 + (bit_offset
% 64)) / 8 / 8;
2165 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
2167 = merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2174 /* Arrays are handled as small records. */
2177 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2178 TREE_TYPE (type
), subclasses
, bit_offset
);
2182 /* The partial classes are now full classes. */
2183 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2184 subclasses
[0] = X86_64_SSE_CLASS
;
2185 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
2186 && !((bit_offset
% 64) == 0 && bytes
== 4))
2187 subclasses
[0] = X86_64_INTEGER_CLASS
;
2189 for (i
= 0; i
< words
; i
++)
2190 classes
[i
] = subclasses
[i
% num
];
2195 case QUAL_UNION_TYPE
:
2196 /* Unions are similar to RECORD_TYPE but offset is always 0.
2198 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2200 if (TREE_CODE (field
) == FIELD_DECL
)
2204 if (TREE_TYPE (field
) == error_mark_node
)
2207 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2208 TREE_TYPE (field
), subclasses
,
2212 for (i
= 0; i
< num
&& i
< words
; i
++)
2213 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2224 /* When size > 16 bytes, if the first one isn't
2225 X86_64_SSE_CLASS or any other ones aren't
2226 X86_64_SSEUP_CLASS, everything should be passed in
2228 if (classes
[0] != X86_64_SSE_CLASS
)
2231 for (i
= 1; i
< words
; i
++)
2232 if (classes
[i
] != X86_64_SSEUP_CLASS
)
2236 /* Final merger cleanup. */
2237 for (i
= 0; i
< words
; i
++)
2239 /* If one class is MEMORY, everything should be passed in
2241 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2244 /* The X86_64_SSEUP_CLASS should be always preceded by
2245 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2246 if (classes
[i
] == X86_64_SSEUP_CLASS
2247 && classes
[i
- 1] != X86_64_SSE_CLASS
2248 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
2250 /* The first one should never be X86_64_SSEUP_CLASS. */
2251 gcc_assert (i
!= 0);
2252 classes
[i
] = X86_64_SSE_CLASS
;
2255 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2256 everything should be passed in memory. */
2257 if (classes
[i
] == X86_64_X87UP_CLASS
2258 && (classes
[i
- 1] != X86_64_X87_CLASS
))
2262 /* The first one should never be X86_64_X87UP_CLASS. */
2263 gcc_assert (i
!= 0);
2264 if (!warned
&& warn_psabi
)
2267 inform (input_location
,
2268 "the ABI of passing union with %<long double%>"
2269 " has changed in GCC 4.4");
2277 /* Compute alignment needed. We align all types to natural boundaries with
2278 exception of XFmode that is aligned to 64bits. */
2279 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2281 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2284 mode_alignment
= 128;
2285 else if (mode
== XCmode
)
2286 mode_alignment
= 256;
2287 if (COMPLEX_MODE_P (mode
))
2288 mode_alignment
/= 2;
2289 /* Misaligned fields are always returned in memory. */
2290 if (bit_offset
% mode_alignment
)
2294 /* for V1xx modes, just use the base mode */
2295 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
2296 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
2297 mode
= GET_MODE_INNER (mode
);
2299 /* Classification of atomic types. */
2304 classes
[0] = X86_64_SSE_CLASS
;
2307 classes
[0] = X86_64_SSE_CLASS
;
2308 classes
[1] = X86_64_SSEUP_CLASS
;
2318 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
2320 /* Analyze last 128 bits only. */
2321 size
= (size
- 1) & 0x7f;
2325 classes
[0] = X86_64_INTEGERSI_CLASS
;
2330 classes
[0] = X86_64_INTEGER_CLASS
;
2333 else if (size
< 64+32)
2335 classes
[0] = X86_64_INTEGER_CLASS
;
2336 classes
[1] = X86_64_INTEGERSI_CLASS
;
2339 else if (size
< 64+64)
2341 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2349 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2353 /* OImode shouldn't be used directly. */
2358 if (!(bit_offset
% 64))
2359 classes
[0] = X86_64_SSESF_CLASS
;
2361 classes
[0] = X86_64_SSE_CLASS
;
2364 classes
[0] = X86_64_SSEDF_CLASS
;
2367 classes
[0] = X86_64_X87_CLASS
;
2368 classes
[1] = X86_64_X87UP_CLASS
;
2371 classes
[0] = X86_64_SSE_CLASS
;
2372 classes
[1] = X86_64_SSEUP_CLASS
;
2375 classes
[0] = X86_64_SSE_CLASS
;
2376 if (!(bit_offset
% 64))
2382 if (!warned
&& warn_psabi
)
2385 inform (input_location
,
2386 "the ABI of passing structure with %<complex float%>"
2387 " member has changed in GCC 4.4");
2389 classes
[1] = X86_64_SSESF_CLASS
;
2393 classes
[0] = X86_64_SSEDF_CLASS
;
2394 classes
[1] = X86_64_SSEDF_CLASS
;
2397 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2400 /* This modes is larger than 16 bytes. */
2408 classes
[0] = X86_64_SSE_CLASS
;
2409 classes
[1] = X86_64_SSEUP_CLASS
;
2410 classes
[2] = X86_64_SSEUP_CLASS
;
2411 classes
[3] = X86_64_SSEUP_CLASS
;
2419 classes
[0] = X86_64_SSE_CLASS
;
2420 classes
[1] = X86_64_SSEUP_CLASS
;
2421 classes
[2] = X86_64_SSEUP_CLASS
;
2422 classes
[3] = X86_64_SSEUP_CLASS
;
2423 classes
[4] = X86_64_SSEUP_CLASS
;
2424 classes
[5] = X86_64_SSEUP_CLASS
;
2425 classes
[6] = X86_64_SSEUP_CLASS
;
2426 classes
[7] = X86_64_SSEUP_CLASS
;
2434 classes
[0] = X86_64_SSE_CLASS
;
2435 classes
[1] = X86_64_SSEUP_CLASS
;
2443 classes
[0] = X86_64_SSE_CLASS
;
2449 gcc_assert (VECTOR_MODE_P (mode
));
2454 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2456 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2457 classes
[0] = X86_64_INTEGERSI_CLASS
;
2459 classes
[0] = X86_64_INTEGER_CLASS
;
2460 classes
[1] = X86_64_INTEGER_CLASS
;
2461 return 1 + (bytes
> 8);
2465 /* Examine the argument and return set number of register required in each
2466 class. Return true iff parameter should be passed in memory. */
2469 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
2470 int *int_nregs
, int *sse_nregs
)
2472 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2473 int n
= classify_argument (mode
, type
, regclass
, 0);
2480 for (n
--; n
>= 0; n
--)
2481 switch (regclass
[n
])
2483 case X86_64_INTEGER_CLASS
:
2484 case X86_64_INTEGERSI_CLASS
:
2487 case X86_64_SSE_CLASS
:
2488 case X86_64_SSESF_CLASS
:
2489 case X86_64_SSEDF_CLASS
:
2492 case X86_64_NO_CLASS
:
2493 case X86_64_SSEUP_CLASS
:
2495 case X86_64_X87_CLASS
:
2496 case X86_64_X87UP_CLASS
:
2497 case X86_64_COMPLEX_X87_CLASS
:
2501 case X86_64_MEMORY_CLASS
:
2508 /* Construct container for the argument used by GCC interface. See
2509 FUNCTION_ARG for the detailed description. */
2512 construct_container (machine_mode mode
, machine_mode orig_mode
,
2513 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
2514 const int *intreg
, int sse_regno
)
2516 /* The following variables hold the static issued_error state. */
2517 static bool issued_sse_arg_error
;
2518 static bool issued_sse_ret_error
;
2519 static bool issued_x87_ret_error
;
2521 machine_mode tmpmode
;
2523 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2524 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2528 int needed_sseregs
, needed_intregs
;
2529 rtx exp
[MAX_CLASSES
];
2532 n
= classify_argument (mode
, type
, regclass
, 0);
2535 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
2538 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2541 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2542 some less clueful developer tries to use floating-point anyway. */
2543 if (needed_sseregs
&& !TARGET_SSE
)
2547 if (!issued_sse_ret_error
)
2549 error ("SSE register return with SSE disabled");
2550 issued_sse_ret_error
= true;
2553 else if (!issued_sse_arg_error
)
2555 error ("SSE register argument with SSE disabled");
2556 issued_sse_arg_error
= true;
2561 /* Likewise, error if the ABI requires us to return values in the
2562 x87 registers and the user specified -mno-80387. */
2563 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
2564 for (i
= 0; i
< n
; i
++)
2565 if (regclass
[i
] == X86_64_X87_CLASS
2566 || regclass
[i
] == X86_64_X87UP_CLASS
2567 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
2569 if (!issued_x87_ret_error
)
2571 error ("x87 register return with x87 disabled");
2572 issued_x87_ret_error
= true;
2577 /* First construct simple cases. Avoid SCmode, since we want to use
2578 single register to pass this type. */
2579 if (n
== 1 && mode
!= SCmode
)
2580 switch (regclass
[0])
2582 case X86_64_INTEGER_CLASS
:
2583 case X86_64_INTEGERSI_CLASS
:
2584 return gen_rtx_REG (mode
, intreg
[0]);
2585 case X86_64_SSE_CLASS
:
2586 case X86_64_SSESF_CLASS
:
2587 case X86_64_SSEDF_CLASS
:
2588 if (mode
!= BLKmode
)
2589 return gen_reg_or_parallel (mode
, orig_mode
,
2590 GET_SSE_REGNO (sse_regno
));
2592 case X86_64_X87_CLASS
:
2593 case X86_64_COMPLEX_X87_CLASS
:
2594 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2595 case X86_64_NO_CLASS
:
2596 /* Zero sized array, struct or class. */
2602 && regclass
[0] == X86_64_SSE_CLASS
2603 && regclass
[1] == X86_64_SSEUP_CLASS
2605 return gen_reg_or_parallel (mode
, orig_mode
,
2606 GET_SSE_REGNO (sse_regno
));
2608 && regclass
[0] == X86_64_SSE_CLASS
2609 && regclass
[1] == X86_64_SSEUP_CLASS
2610 && regclass
[2] == X86_64_SSEUP_CLASS
2611 && regclass
[3] == X86_64_SSEUP_CLASS
2613 return gen_reg_or_parallel (mode
, orig_mode
,
2614 GET_SSE_REGNO (sse_regno
));
2616 && regclass
[0] == X86_64_SSE_CLASS
2617 && regclass
[1] == X86_64_SSEUP_CLASS
2618 && regclass
[2] == X86_64_SSEUP_CLASS
2619 && regclass
[3] == X86_64_SSEUP_CLASS
2620 && regclass
[4] == X86_64_SSEUP_CLASS
2621 && regclass
[5] == X86_64_SSEUP_CLASS
2622 && regclass
[6] == X86_64_SSEUP_CLASS
2623 && regclass
[7] == X86_64_SSEUP_CLASS
2625 return gen_reg_or_parallel (mode
, orig_mode
,
2626 GET_SSE_REGNO (sse_regno
));
2628 && regclass
[0] == X86_64_X87_CLASS
2629 && regclass
[1] == X86_64_X87UP_CLASS
)
2630 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2633 && regclass
[0] == X86_64_INTEGER_CLASS
2634 && regclass
[1] == X86_64_INTEGER_CLASS
2635 && (mode
== CDImode
|| mode
== TImode
|| mode
== BLKmode
)
2636 && intreg
[0] + 1 == intreg
[1])
2638 if (mode
== BLKmode
)
2640 /* Use TImode for BLKmode values in 2 integer registers. */
2641 exp
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
2642 gen_rtx_REG (TImode
, intreg
[0]),
2644 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (1));
2645 XVECEXP (ret
, 0, 0) = exp
[0];
2649 return gen_rtx_REG (mode
, intreg
[0]);
2652 /* Otherwise figure out the entries of the PARALLEL. */
2653 for (i
= 0; i
< n
; i
++)
2657 switch (regclass
[i
])
2659 case X86_64_NO_CLASS
:
2661 case X86_64_INTEGER_CLASS
:
2662 case X86_64_INTEGERSI_CLASS
:
2663 /* Merge TImodes on aligned occasions here too. */
2664 if (i
* 8 + 8 > bytes
)
2666 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
2667 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
2668 /* We've requested 24 bytes we
2669 don't have mode for. Use DImode. */
2672 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
2677 = gen_rtx_EXPR_LIST (VOIDmode
,
2678 gen_rtx_REG (tmpmode
, *intreg
),
2682 case X86_64_SSESF_CLASS
:
2684 = gen_rtx_EXPR_LIST (VOIDmode
,
2685 gen_rtx_REG (SFmode
,
2686 GET_SSE_REGNO (sse_regno
)),
2690 case X86_64_SSEDF_CLASS
:
2692 = gen_rtx_EXPR_LIST (VOIDmode
,
2693 gen_rtx_REG (DFmode
,
2694 GET_SSE_REGNO (sse_regno
)),
2698 case X86_64_SSE_CLASS
:
2706 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
2716 && regclass
[1] == X86_64_SSEUP_CLASS
2717 && regclass
[2] == X86_64_SSEUP_CLASS
2718 && regclass
[3] == X86_64_SSEUP_CLASS
);
2724 && regclass
[1] == X86_64_SSEUP_CLASS
2725 && regclass
[2] == X86_64_SSEUP_CLASS
2726 && regclass
[3] == X86_64_SSEUP_CLASS
2727 && regclass
[4] == X86_64_SSEUP_CLASS
2728 && regclass
[5] == X86_64_SSEUP_CLASS
2729 && regclass
[6] == X86_64_SSEUP_CLASS
2730 && regclass
[7] == X86_64_SSEUP_CLASS
);
2738 = gen_rtx_EXPR_LIST (VOIDmode
,
2739 gen_rtx_REG (tmpmode
,
2740 GET_SSE_REGNO (sse_regno
)),
2749 /* Empty aligned struct, union or class. */
2753 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2754 for (i
= 0; i
< nexps
; i
++)
2755 XVECEXP (ret
, 0, i
) = exp
[i
];
2759 /* Update the data in CUM to advance over an argument of mode MODE
2760 and data type TYPE. (TYPE is null for libcalls where that information
2761 may not be available.)
2763 Return a number of integer regsiters advanced over. */
2766 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2767 const_tree type
, HOST_WIDE_INT bytes
,
2768 HOST_WIDE_INT words
)
2771 bool error_p
= false;
2775 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2776 bytes in registers. */
2777 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
2797 cum
->words
+= words
;
2798 cum
->nregs
-= words
;
2799 cum
->regno
+= words
;
2800 if (cum
->nregs
>= 0)
2802 if (cum
->nregs
<= 0)
2805 cfun
->machine
->arg_reg_available
= false;
2811 /* OImode shouldn't be used directly. */
2815 if (cum
->float_in_sse
== -1)
2817 if (cum
->float_in_sse
< 2)
2821 if (cum
->float_in_sse
== -1)
2823 if (cum
->float_in_sse
< 1)
2846 if (!type
|| !AGGREGATE_TYPE_P (type
))
2848 cum
->sse_words
+= words
;
2849 cum
->sse_nregs
-= 1;
2850 cum
->sse_regno
+= 1;
2851 if (cum
->sse_nregs
<= 0)
2865 if (!type
|| !AGGREGATE_TYPE_P (type
))
2867 cum
->mmx_words
+= words
;
2868 cum
->mmx_nregs
-= 1;
2869 cum
->mmx_regno
+= 1;
2870 if (cum
->mmx_nregs
<= 0)
2880 cum
->float_in_sse
= 0;
2881 error ("calling %qD with SSE calling convention without "
2882 "SSE/SSE2 enabled", cum
->decl
);
2883 sorry ("this is a GCC bug that can be worked around by adding "
2884 "attribute used to function called");
2891 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2892 const_tree type
, HOST_WIDE_INT words
, bool named
)
2894 int int_nregs
, sse_nregs
;
2896 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2897 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
2898 || VALID_AVX256_REG_MODE (mode
)))
2901 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
2902 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2904 cum
->nregs
-= int_nregs
;
2905 cum
->sse_nregs
-= sse_nregs
;
2906 cum
->regno
+= int_nregs
;
2907 cum
->sse_regno
+= sse_nregs
;
2912 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
2913 cum
->words
= ROUND_UP (cum
->words
, align
);
2914 cum
->words
+= words
;
2920 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
2921 HOST_WIDE_INT words
)
2923 /* Otherwise, this should be passed indirect. */
2924 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
2926 cum
->words
+= words
;
2936 /* Update the data in CUM to advance over argument ARG. */
2939 ix86_function_arg_advance (cumulative_args_t cum_v
,
2940 const function_arg_info
&arg
)
2942 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2943 machine_mode mode
= arg
.mode
;
2944 HOST_WIDE_INT bytes
, words
;
2947 /* The argument of interrupt handler is a special case and is
2948 handled in ix86_function_arg. */
2949 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
2952 bytes
= arg
.promoted_size_in_bytes ();
2953 words
= CEIL (bytes
, UNITS_PER_WORD
);
2956 mode
= type_natural_mode (arg
.type
, NULL
, false);
2960 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
2962 if (call_abi
== MS_ABI
)
2963 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
2965 nregs
= function_arg_advance_64 (cum
, mode
, arg
.type
, words
,
2969 nregs
= function_arg_advance_32 (cum
, mode
, arg
.type
, bytes
, words
);
2973 /* Track if there are outgoing arguments on stack. */
2975 cfun
->machine
->outgoing_args_on_stack
= true;
2979 /* Define where to put the arguments to a function.
2980 Value is zero to push the argument on the stack,
2981 or a hard register in which to store the argument.
2983 MODE is the argument's machine mode.
2984 TYPE is the data type of the argument (as a tree).
2985 This is null for libcalls where that information may
2987 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2988 the preceding args and about the function being called.
2989 NAMED is nonzero if this argument is a named parameter
2990 (otherwise it is an extra parameter matching an ellipsis). */
2993 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2994 machine_mode orig_mode
, const_tree type
,
2995 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
2997 bool error_p
= false;
2999 /* Avoid the AL settings for the Unix64 ABI. */
3000 if (mode
== VOIDmode
)
3005 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3006 bytes in registers. */
3007 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
3026 if (words
<= cum
->nregs
)
3028 int regno
= cum
->regno
;
3030 /* Fastcall allocates the first two DWORD (SImode) or
3031 smaller arguments to ECX and EDX if it isn't an
3037 || (type
&& AGGREGATE_TYPE_P (type
)))
3040 /* ECX not EAX is the first allocated register. */
3041 if (regno
== AX_REG
)
3044 return gen_rtx_REG (mode
, regno
);
3049 if (cum
->float_in_sse
== -1)
3051 if (cum
->float_in_sse
< 2)
3055 if (cum
->float_in_sse
== -1)
3057 if (cum
->float_in_sse
< 1)
3061 /* In 32bit, we pass TImode in xmm registers. */
3068 if (!type
|| !AGGREGATE_TYPE_P (type
))
3071 return gen_reg_or_parallel (mode
, orig_mode
,
3072 cum
->sse_regno
+ FIRST_SSE_REG
);
3078 /* OImode and XImode shouldn't be used directly. */
3093 if (!type
|| !AGGREGATE_TYPE_P (type
))
3096 return gen_reg_or_parallel (mode
, orig_mode
,
3097 cum
->sse_regno
+ FIRST_SSE_REG
);
3107 if (!type
|| !AGGREGATE_TYPE_P (type
))
3110 return gen_reg_or_parallel (mode
, orig_mode
,
3111 cum
->mmx_regno
+ FIRST_MMX_REG
);
3117 cum
->float_in_sse
= 0;
3118 error ("calling %qD with SSE calling convention without "
3119 "SSE/SSE2 enabled", cum
->decl
);
3120 sorry ("this is a GCC bug that can be worked around by adding "
3121 "attribute used to function called");
3128 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3129 machine_mode orig_mode
, const_tree type
, bool named
)
3131 /* Handle a hidden AL argument containing number of registers
3132 for varargs x86-64 functions. */
3133 if (mode
== VOIDmode
)
3134 return GEN_INT (cum
->maybe_vaarg
3135 ? (cum
->sse_nregs
< 0
3136 ? X86_64_SSE_REGPARM_MAX
3157 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3163 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3165 &x86_64_int_parameter_registers
[cum
->regno
],
3170 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3171 machine_mode orig_mode
, bool named
, const_tree type
,
3172 HOST_WIDE_INT bytes
)
3176 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3177 We use value of -2 to specify that current function call is MSABI. */
3178 if (mode
== VOIDmode
)
3179 return GEN_INT (-2);
3181 /* If we've run out of registers, it goes on the stack. */
3182 if (cum
->nregs
== 0)
3185 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3187 /* Only floating point modes are passed in anything but integer regs. */
3188 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3192 if (type
== NULL_TREE
|| !AGGREGATE_TYPE_P (type
))
3193 regno
= cum
->regno
+ FIRST_SSE_REG
;
3199 /* Unnamed floating parameters are passed in both the
3200 SSE and integer registers. */
3201 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3202 t2
= gen_rtx_REG (mode
, regno
);
3203 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3204 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3205 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3208 /* Handle aggregated types passed in register. */
3209 if (orig_mode
== BLKmode
)
3211 if (bytes
> 0 && bytes
<= 8)
3212 mode
= (bytes
> 4 ? DImode
: SImode
);
3213 if (mode
== BLKmode
)
3217 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3220 /* Return where to put the arguments to a function.
3221 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3223 ARG describes the argument while CUM gives information about the
3224 preceding args and about the function being called. */
3227 ix86_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3229 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3230 machine_mode mode
= arg
.mode
;
3231 HOST_WIDE_INT bytes
, words
;
3234 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3236 gcc_assert (arg
.type
!= NULL_TREE
);
3237 if (POINTER_TYPE_P (arg
.type
))
3239 /* This is the pointer argument. */
3240 gcc_assert (TYPE_MODE (arg
.type
) == Pmode
);
3241 /* It is at -WORD(AP) in the current frame in interrupt and
3242 exception handlers. */
3243 reg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
3247 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
3248 && TREE_CODE (arg
.type
) == INTEGER_TYPE
3249 && TYPE_MODE (arg
.type
) == word_mode
);
3250 /* The error code is the word-mode integer argument at
3251 -2 * WORD(AP) in the current frame of the exception
3253 reg
= gen_rtx_MEM (word_mode
,
3254 plus_constant (Pmode
,
3256 -2 * UNITS_PER_WORD
));
3261 bytes
= arg
.promoted_size_in_bytes ();
3262 words
= CEIL (bytes
, UNITS_PER_WORD
);
3264 /* To simplify the code below, represent vector types with a vector mode
3265 even if MMX/SSE are not active. */
3266 if (arg
.type
&& TREE_CODE (arg
.type
) == VECTOR_TYPE
)
3267 mode
= type_natural_mode (arg
.type
, cum
, false);
3271 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3273 if (call_abi
== MS_ABI
)
3274 reg
= function_arg_ms_64 (cum
, mode
, arg
.mode
, arg
.named
,
3277 reg
= function_arg_64 (cum
, mode
, arg
.mode
, arg
.type
, arg
.named
);
3280 reg
= function_arg_32 (cum
, mode
, arg
.mode
, arg
.type
, bytes
, words
);
3282 /* Track if there are outgoing arguments on stack. */
3283 if (reg
== NULL_RTX
&& cum
->caller
)
3284 cfun
->machine
->outgoing_args_on_stack
= true;
3289 /* A C expression that indicates when an argument must be passed by
3290 reference. If nonzero for an argument, a copy of that argument is
3291 made in memory and a pointer to the argument is passed instead of
3292 the argument itself. The pointer is passed in whatever way is
3293 appropriate for passing a pointer to that type. */
3296 ix86_pass_by_reference (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3298 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3302 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3304 /* See Windows x64 Software Convention. */
3305 if (call_abi
== MS_ABI
)
3307 HOST_WIDE_INT msize
= GET_MODE_SIZE (arg
.mode
);
3309 if (tree type
= arg
.type
)
3311 /* Arrays are passed by reference. */
3312 if (TREE_CODE (type
) == ARRAY_TYPE
)
3315 if (RECORD_OR_UNION_TYPE_P (type
))
3317 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3318 are passed by reference. */
3319 msize
= int_size_in_bytes (type
);
3323 /* __m128 is passed by reference. */
3324 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
3326 else if (arg
.type
&& int_size_in_bytes (arg
.type
) == -1)
3333 /* Return true when TYPE should be 128bit aligned for 32bit argument
3334 passing ABI. XXX: This function is obsolete and is only used for
3335 checking psABI compatibility with previous versions of GCC. */
3338 ix86_compat_aligned_value_p (const_tree type
)
3340 machine_mode mode
= TYPE_MODE (type
);
3341 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
3345 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3347 if (TYPE_ALIGN (type
) < 128)
3350 if (AGGREGATE_TYPE_P (type
))
3352 /* Walk the aggregates recursively. */
3353 switch (TREE_CODE (type
))
3357 case QUAL_UNION_TYPE
:
3361 /* Walk all the structure fields. */
3362 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3364 if (TREE_CODE (field
) == FIELD_DECL
3365 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
3372 /* Just for use if some languages passes arrays by value. */
3373 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
3384 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3385 XXX: This function is obsolete and is only used for checking psABI
3386 compatibility with previous versions of GCC. */
3389 ix86_compat_function_arg_boundary (machine_mode mode
,
3390 const_tree type
, unsigned int align
)
3392 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3393 natural boundaries. */
3394 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
3396 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3397 make an exception for SSE modes since these require 128bit
3400 The handling here differs from field_alignment. ICC aligns MMX
3401 arguments to 4 byte boundaries, while structure fields are aligned
3402 to 8 byte boundaries. */
3405 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
3406 align
= PARM_BOUNDARY
;
3410 if (!ix86_compat_aligned_value_p (type
))
3411 align
= PARM_BOUNDARY
;
3414 if (align
> BIGGEST_ALIGNMENT
)
3415 align
= BIGGEST_ALIGNMENT
;
3419 /* Return true when TYPE should be 128bit aligned for 32bit argument
3423 ix86_contains_aligned_value_p (const_tree type
)
3425 machine_mode mode
= TYPE_MODE (type
);
3427 if (mode
== XFmode
|| mode
== XCmode
)
3430 if (TYPE_ALIGN (type
) < 128)
3433 if (AGGREGATE_TYPE_P (type
))
3435 /* Walk the aggregates recursively. */
3436 switch (TREE_CODE (type
))
3440 case QUAL_UNION_TYPE
:
3444 /* Walk all the structure fields. */
3445 for (field
= TYPE_FIELDS (type
);
3447 field
= DECL_CHAIN (field
))
3449 if (TREE_CODE (field
) == FIELD_DECL
3450 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
3457 /* Just for use if some languages passes arrays by value. */
3458 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
3467 return TYPE_ALIGN (type
) >= 128;
3472 /* Gives the alignment boundary, in bits, of an argument with the
3473 specified mode and type. */
3476 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
3481 /* Since the main variant type is used for call, we convert it to
3482 the main variant type. */
3483 type
= TYPE_MAIN_VARIANT (type
);
3484 align
= TYPE_ALIGN (type
);
3485 if (TYPE_EMPTY_P (type
))
3486 return PARM_BOUNDARY
;
3489 align
= GET_MODE_ALIGNMENT (mode
);
3490 if (align
< PARM_BOUNDARY
)
3491 align
= PARM_BOUNDARY
;
3495 unsigned int saved_align
= align
;
3499 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3502 if (mode
== XFmode
|| mode
== XCmode
)
3503 align
= PARM_BOUNDARY
;
3505 else if (!ix86_contains_aligned_value_p (type
))
3506 align
= PARM_BOUNDARY
;
3509 align
= PARM_BOUNDARY
;
3514 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
3518 inform (input_location
,
3519 "the ABI for passing parameters with %d-byte"
3520 " alignment has changed in GCC 4.6",
3521 align
/ BITS_PER_UNIT
);
3528 /* Return true if N is a possible register number of function value. */
3531 ix86_function_value_regno_p (const unsigned int regno
)
3538 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
3541 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
3543 /* Complex values are returned in %st(0)/%st(1) pair. */
3546 /* TODO: The function should depend on current function ABI but
3547 builtins.c would need updating then. Therefore we use the
3549 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
3551 return TARGET_FLOAT_RETURNS_IN_80387
;
3553 /* Complex values are returned in %xmm0/%xmm1 pair. */
3559 if (TARGET_MACHO
|| TARGET_64BIT
)
3567 /* Check whether the register REGNO should be zeroed on X86.
3568 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3569 together, no need to zero it again.
3570 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3573 zero_call_used_regno_p (const unsigned int regno
,
3574 bool all_sse_zeroed
,
3577 return GENERAL_REGNO_P (regno
)
3578 || (!all_sse_zeroed
&& SSE_REGNO_P (regno
))
3579 || MASK_REGNO_P (regno
)
3580 || (need_zero_mmx
&& MMX_REGNO_P (regno
));
3583 /* Return the machine_mode that is used to zero register REGNO. */
3586 zero_call_used_regno_mode (const unsigned int regno
)
3588 /* NB: We only need to zero the lower 32 bits for integer registers
3589 and the lower 128 bits for vector registers since destination are
3590 zero-extended to the full register width. */
3591 if (GENERAL_REGNO_P (regno
))
3593 else if (SSE_REGNO_P (regno
))
3595 else if (MASK_REGNO_P (regno
))
3597 else if (MMX_REGNO_P (regno
))
3603 /* Generate a rtx to zero all vector registers together if possible,
3604 otherwise, return NULL. */
3607 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs
)
3612 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3613 if ((IN_RANGE (regno
, FIRST_SSE_REG
, LAST_SSE_REG
)
3615 && (REX_SSE_REGNO_P (regno
)
3616 || (TARGET_AVX512F
&& EXT_REX_SSE_REGNO_P (regno
)))))
3617 && !TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3620 return gen_avx_vzeroall ();
3623 /* Generate insns to zero all st registers together.
3624 Return true when zeroing instructions are generated.
3625 Assume the number of st registers that are zeroed is num_of_st,
3626 we will emit the following sequence to zero them together:
3635 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3636 mark stack slots empty.
3638 How to compute the num_of_st:
3639 There is no direct mapping from stack registers to hard register
3640 numbers. If one stack register needs to be cleared, we don't know
3641 where in the stack the value remains. So, if any stack register
3642 needs to be cleared, the whole stack should be cleared. However,
3643 x87 stack registers that hold the return value should be excluded.
3644 x87 returns in the top (two for complex values) register, so
3645 num_of_st should be 7/6 when x87 returns, otherwise it will be 8. */
3649 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs
)
3651 unsigned int num_of_st
= 0;
3652 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3653 if ((STACK_REGNO_P (regno
) || MMX_REGNO_P (regno
))
3654 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3663 bool return_with_x87
= false;
3664 return_with_x87
= (crtl
->return_rtx
3665 && (STACK_REG_P (crtl
->return_rtx
)));
3667 bool complex_return
= false;
3668 complex_return
= (crtl
->return_rtx
3669 && COMPLEX_MODE_P (GET_MODE (crtl
->return_rtx
)));
3671 if (return_with_x87
)
3679 rtx st_reg
= gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3680 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3681 emit_insn (gen_rtx_SET (st_reg
, CONST0_RTX (XFmode
)));
3683 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3686 insn
= emit_insn (gen_rtx_SET (st_reg
, st_reg
));
3687 add_reg_note (insn
, REG_DEAD
, st_reg
);
3693 /* When the routine exit in MMX mode, if any ST register needs
3694 to be zeroed, we should clear all MMX registers except the
3695 RET_MMX_REGNO that holds the return value. */
3697 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs
,
3698 unsigned int ret_mmx_regno
)
3700 bool need_zero_all_mm
= false;
3701 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3702 if (STACK_REGNO_P (regno
)
3703 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3705 need_zero_all_mm
= true;
3709 if (!need_zero_all_mm
)
3712 rtx zero_mmx
= NULL_RTX
;
3713 machine_mode mode
= V4HImode
;
3714 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3715 if (regno
!= ret_mmx_regno
)
3717 rtx reg
= gen_rtx_REG (mode
, regno
);
3718 if (zero_mmx
== NULL_RTX
)
3721 emit_insn (gen_rtx_SET (reg
, CONST0_RTX (mode
)));
3724 emit_move_insn (reg
, zero_mmx
);
3729 /* TARGET_ZERO_CALL_USED_REGS. */
3730 /* Generate a sequence of instructions that zero registers specified by
3731 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3734 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs
)
3736 HARD_REG_SET zeroed_hardregs
;
3737 bool all_sse_zeroed
= false;
3738 bool all_st_zeroed
= false;
3739 bool all_mm_zeroed
= false;
3741 CLEAR_HARD_REG_SET (zeroed_hardregs
);
3743 /* first, let's see whether we can zero all vector registers together. */
3744 rtx zero_all_vec_insn
= zero_all_vector_registers (need_zeroed_hardregs
);
3745 if (zero_all_vec_insn
)
3747 emit_insn (zero_all_vec_insn
);
3748 all_sse_zeroed
= true;
3751 /* mm/st registers are shared registers set, we should follow the following
3752 rules to clear them:
3753 MMX exit mode x87 exit mode
3754 -------------|----------------------|---------------
3755 uses x87 reg | clear all MMX | clear all x87
3756 uses MMX reg | clear individual MMX | clear all x87
3757 x87 + MMX | clear all MMX | clear all x87
3759 first, we should decide which mode (MMX mode or x87 mode) the function
3762 bool exit_with_mmx_mode
= (crtl
->return_rtx
3763 && (MMX_REG_P (crtl
->return_rtx
)));
3765 if (!exit_with_mmx_mode
)
3766 /* x87 exit mode, we should zero all st registers together. */
3768 all_st_zeroed
= zero_all_st_registers (need_zeroed_hardregs
);
3770 SET_HARD_REG_BIT (zeroed_hardregs
, FIRST_STACK_REG
);
3773 /* MMX exit mode, check whether we can zero all mm registers. */
3775 unsigned int exit_mmx_regno
= REGNO (crtl
->return_rtx
);
3776 all_mm_zeroed
= zero_all_mm_registers (need_zeroed_hardregs
,
3779 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3780 if (regno
!= exit_mmx_regno
)
3781 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3784 /* Now, generate instructions to zero all the other registers. */
3786 rtx zero_gpr
= NULL_RTX
;
3787 rtx zero_vector
= NULL_RTX
;
3788 rtx zero_mask
= NULL_RTX
;
3789 rtx zero_mmx
= NULL_RTX
;
3791 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3793 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3795 if (!zero_call_used_regno_p (regno
, all_sse_zeroed
,
3796 exit_with_mmx_mode
&& !all_mm_zeroed
))
3799 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3801 rtx reg
, tmp
, zero_rtx
;
3802 machine_mode mode
= zero_call_used_regno_mode (regno
);
3804 reg
= gen_rtx_REG (mode
, regno
);
3805 zero_rtx
= CONST0_RTX (mode
);
3808 if (zero_gpr
== NULL_RTX
)
3811 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3812 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
3814 rtx clob
= gen_rtx_CLOBBER (VOIDmode
,
3815 gen_rtx_REG (CCmode
,
3817 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
3824 emit_move_insn (reg
, zero_gpr
);
3825 else if (mode
== V4SFmode
)
3826 if (zero_vector
== NULL_RTX
)
3829 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3833 emit_move_insn (reg
, zero_vector
);
3834 else if (mode
== HImode
)
3835 if (zero_mask
== NULL_RTX
)
3838 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3842 emit_move_insn (reg
, zero_mask
);
3843 else if (mode
== V4HImode
)
3844 if (zero_mmx
== NULL_RTX
)
3847 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3851 emit_move_insn (reg
, zero_mmx
);
3855 return zeroed_hardregs
;
3858 /* Define how to find the value returned by a function.
3859 VALTYPE is the data type of the value (as a tree).
3860 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3861 otherwise, FUNC is 0. */
3864 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
3865 const_tree fntype
, const_tree fn
)
3869 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3870 we normally prevent this case when mmx is not available. However
3871 some ABIs may require the result to be returned like DImode. */
3872 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3873 regno
= FIRST_MMX_REG
;
3875 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3876 we prevent this case when sse is not available. However some ABIs
3877 may require the result to be returned like integer TImode. */
3878 else if (mode
== TImode
3879 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3880 regno
= FIRST_SSE_REG
;
3882 /* 32-byte vector modes in %ymm0. */
3883 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
3884 regno
= FIRST_SSE_REG
;
3886 /* 64-byte vector modes in %zmm0. */
3887 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
3888 regno
= FIRST_SSE_REG
;
3890 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3891 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
3892 regno
= FIRST_FLOAT_REG
;
3894 /* Most things go in %eax. */
3897 /* Override FP return register with %xmm0 for local functions when
3898 SSE math is enabled or for functions with sseregparm attribute. */
3899 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
3901 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
3902 if (sse_level
== -1)
3904 error ("calling %qD with SSE calling convention without "
3905 "SSE/SSE2 enabled", fn
);
3906 sorry ("this is a GCC bug that can be worked around by adding "
3907 "attribute used to function called");
3909 else if ((sse_level
>= 1 && mode
== SFmode
)
3910 || (sse_level
== 2 && mode
== DFmode
))
3911 regno
= FIRST_SSE_REG
;
3914 /* OImode shouldn't be used directly. */
3915 gcc_assert (mode
!= OImode
);
3917 return gen_rtx_REG (orig_mode
, regno
);
3921 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
3926 /* Handle libcalls, which don't provide a type node. */
3927 if (valtype
== NULL
)
3941 regno
= FIRST_SSE_REG
;
3945 regno
= FIRST_FLOAT_REG
;
3953 return gen_rtx_REG (mode
, regno
);
3955 else if (POINTER_TYPE_P (valtype
))
3957 /* Pointers are always returned in word_mode. */
3961 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
3962 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
3963 x86_64_int_return_registers
, 0);
3965 /* For zero sized structures, construct_container returns NULL, but we
3966 need to keep rest of compiler happy by returning meaningful value. */
3968 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
3974 function_value_ms_32 (machine_mode orig_mode
, machine_mode mode
,
3975 const_tree fntype
, const_tree fn
, const_tree valtype
)
3979 /* Floating point return values in %st(0)
3980 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3981 if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
3982 && (GET_MODE_SIZE (mode
) > 8
3983 || valtype
== NULL_TREE
|| !AGGREGATE_TYPE_P (valtype
)))
3985 regno
= FIRST_FLOAT_REG
;
3986 return gen_rtx_REG (orig_mode
, regno
);
3989 return function_value_32(orig_mode
, mode
, fntype
,fn
);
3993 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
3996 unsigned int regno
= AX_REG
;
4000 switch (GET_MODE_SIZE (mode
))
4003 if (valtype
!= NULL_TREE
4004 && !VECTOR_INTEGER_TYPE_P (valtype
)
4005 && !VECTOR_INTEGER_TYPE_P (valtype
)
4006 && !INTEGRAL_TYPE_P (valtype
)
4007 && !VECTOR_FLOAT_TYPE_P (valtype
))
4009 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4010 && !COMPLEX_MODE_P (mode
))
4011 regno
= FIRST_SSE_REG
;
4015 if (valtype
!= NULL_TREE
&& AGGREGATE_TYPE_P (valtype
))
4017 if (mode
== SFmode
|| mode
== DFmode
)
4018 regno
= FIRST_SSE_REG
;
4024 return gen_rtx_REG (orig_mode
, regno
);
4028 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4029 machine_mode orig_mode
, machine_mode mode
)
4031 const_tree fn
, fntype
;
4034 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4035 fn
= fntype_or_decl
;
4036 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4038 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4041 return function_value_ms_64 (orig_mode
, mode
, valtype
);
4043 return function_value_ms_32 (orig_mode
, mode
, fntype
, fn
, valtype
);
4045 else if (TARGET_64BIT
)
4046 return function_value_64 (orig_mode
, mode
, valtype
);
4048 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4052 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
4054 machine_mode mode
, orig_mode
;
4056 orig_mode
= TYPE_MODE (valtype
);
4057 mode
= type_natural_mode (valtype
, NULL
, true);
4058 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4061 /* Pointer function arguments and return values are promoted to
4062 word_mode for normal functions. */
4065 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
4066 int *punsignedp
, const_tree fntype
,
4069 if (cfun
->machine
->func_type
== TYPE_NORMAL
4070 && type
!= NULL_TREE
4071 && POINTER_TYPE_P (type
))
4073 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
4076 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
4080 /* Return true if a structure, union or array with MODE containing FIELD
4081 should be accessed using BLKmode. */
4084 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
4086 /* Union with XFmode must be in BLKmode. */
4087 return (mode
== XFmode
4088 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
4089 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
4093 ix86_libcall_value (machine_mode mode
)
4095 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4098 /* Return true iff type is returned in memory. */
4101 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4103 #ifdef SUBTARGET_RETURN_IN_MEMORY
4104 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
4106 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
4111 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4113 size
= int_size_in_bytes (type
);
4115 /* __m128 is returned in xmm0. */
4116 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
4117 || INTEGRAL_TYPE_P (type
)
4118 || VECTOR_FLOAT_TYPE_P (type
))
4119 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4120 && !COMPLEX_MODE_P (mode
)
4121 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
4124 /* Otherwise, the size must be exactly in [1248]. */
4125 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
4129 int needed_intregs
, needed_sseregs
;
4131 return examine_argument (mode
, type
, 1,
4132 &needed_intregs
, &needed_sseregs
);
4137 size
= int_size_in_bytes (type
);
4139 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4140 bytes in registers. */
4142 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
4144 if (mode
== BLKmode
)
4147 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4150 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4152 /* User-created vectors small enough to fit in EAX. */
4156 /* Unless ABI prescibes otherwise,
4157 MMX/3dNow values are returned in MM0 if available. */
4160 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
4162 /* SSE values are returned in XMM0 if available. */
4166 /* AVX values are returned in YMM0 if available. */
4170 /* AVX512F values are returned in ZMM0 if available. */
4172 return !TARGET_AVX512F
;
4181 /* OImode shouldn't be used directly. */
4182 gcc_assert (mode
!= OImode
);
4190 /* Create the va_list data type. */
4193 ix86_build_builtin_va_list_64 (void)
4195 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4197 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
4198 type_decl
= build_decl (BUILTINS_LOCATION
,
4199 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4201 f_gpr
= build_decl (BUILTINS_LOCATION
,
4202 FIELD_DECL
, get_identifier ("gp_offset"),
4203 unsigned_type_node
);
4204 f_fpr
= build_decl (BUILTINS_LOCATION
,
4205 FIELD_DECL
, get_identifier ("fp_offset"),
4206 unsigned_type_node
);
4207 f_ovf
= build_decl (BUILTINS_LOCATION
,
4208 FIELD_DECL
, get_identifier ("overflow_arg_area"),
4210 f_sav
= build_decl (BUILTINS_LOCATION
,
4211 FIELD_DECL
, get_identifier ("reg_save_area"),
4214 va_list_gpr_counter_field
= f_gpr
;
4215 va_list_fpr_counter_field
= f_fpr
;
4217 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4218 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4219 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4220 DECL_FIELD_CONTEXT (f_sav
) = record
;
4222 TYPE_STUB_DECL (record
) = type_decl
;
4223 TYPE_NAME (record
) = type_decl
;
4224 TYPE_FIELDS (record
) = f_gpr
;
4225 DECL_CHAIN (f_gpr
) = f_fpr
;
4226 DECL_CHAIN (f_fpr
) = f_ovf
;
4227 DECL_CHAIN (f_ovf
) = f_sav
;
4229 layout_type (record
);
4231 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
4232 NULL_TREE
, TYPE_ATTRIBUTES (record
));
4234 /* The correct type is an array type of one element. */
4235 return build_array_type (record
, build_index_type (size_zero_node
));
4238 /* Setup the builtin va_list data type and for 64-bit the additional
4239 calling convention specific va_list data types. */
4242 ix86_build_builtin_va_list (void)
4246 /* Initialize ABI specific va_list builtin types.
4248 In lto1, we can encounter two va_list types:
4249 - one as a result of the type-merge across TUs, and
4250 - the one constructed here.
4251 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4252 a type identity check in canonical_va_list_type based on
4253 TYPE_MAIN_VARIANT (which we used to have) will not work.
4254 Instead, we tag each va_list_type_node with its unique attribute, and
4255 look for the attribute in the type identity check in
4256 canonical_va_list_type.
4258 Tagging sysv_va_list_type_node directly with the attribute is
4259 problematic since it's a array of one record, which will degrade into a
4260 pointer to record when used as parameter (see build_va_arg comments for
4261 an example), dropping the attribute in the process. So we tag the
4264 /* For SYSV_ABI we use an array of one record. */
4265 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
4267 /* For MS_ABI we use plain pointer to argument area. */
4268 tree char_ptr_type
= build_pointer_type (char_type_node
);
4269 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
4270 TYPE_ATTRIBUTES (char_ptr_type
));
4271 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
4273 return ((ix86_abi
== MS_ABI
)
4274 ? ms_va_list_type_node
4275 : sysv_va_list_type_node
);
4279 /* For i386 we use plain pointer to argument area. */
4280 return build_pointer_type (char_type_node
);
4284 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4287 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4293 /* GPR size of varargs save area. */
4294 if (cfun
->va_list_gpr_size
)
4295 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
4297 ix86_varargs_gpr_size
= 0;
4299 /* FPR size of varargs save area. We don't need it if we don't pass
4300 anything in SSE registers. */
4301 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4302 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
4304 ix86_varargs_fpr_size
= 0;
4306 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
4309 save_area
= frame_pointer_rtx
;
4310 set
= get_varargs_alias_set ();
4312 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4313 if (max
> X86_64_REGPARM_MAX
)
4314 max
= X86_64_REGPARM_MAX
;
4316 for (i
= cum
->regno
; i
< max
; i
++)
4318 mem
= gen_rtx_MEM (word_mode
,
4319 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
4320 MEM_NOTRAP_P (mem
) = 1;
4321 set_mem_alias_set (mem
, set
);
4322 emit_move_insn (mem
,
4323 gen_rtx_REG (word_mode
,
4324 x86_64_int_parameter_registers
[i
]));
4327 if (ix86_varargs_fpr_size
)
4330 rtx_code_label
*label
;
4333 /* Now emit code to save SSE registers. The AX parameter contains number
4334 of SSE parameter registers used to call this function, though all we
4335 actually check here is the zero/non-zero status. */
4337 label
= gen_label_rtx ();
4338 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
4339 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
4342 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4343 we used movdqa (i.e. TImode) instead? Perhaps even better would
4344 be if we could determine the real mode of the data, via a hook
4345 into pass_stdarg. Ignore all that for now. */
4347 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
4348 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
4350 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
4351 if (max
> X86_64_SSE_REGPARM_MAX
)
4352 max
= X86_64_SSE_REGPARM_MAX
;
4354 for (i
= cum
->sse_regno
; i
< max
; ++i
)
4356 mem
= plus_constant (Pmode
, save_area
,
4357 i
* 16 + ix86_varargs_gpr_size
);
4358 mem
= gen_rtx_MEM (smode
, mem
);
4359 MEM_NOTRAP_P (mem
) = 1;
4360 set_mem_alias_set (mem
, set
);
4361 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
4363 emit_move_insn (mem
, gen_rtx_REG (smode
, GET_SSE_REGNO (i
)));
4371 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4373 alias_set_type set
= get_varargs_alias_set ();
4376 /* Reset to zero, as there might be a sysv vaarg used
4378 ix86_varargs_gpr_size
= 0;
4379 ix86_varargs_fpr_size
= 0;
4381 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
4385 mem
= gen_rtx_MEM (Pmode
,
4386 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4387 i
* UNITS_PER_WORD
));
4388 MEM_NOTRAP_P (mem
) = 1;
4389 set_mem_alias_set (mem
, set
);
4391 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4392 emit_move_insn (mem
, reg
);
4397 ix86_setup_incoming_varargs (cumulative_args_t cum_v
,
4398 const function_arg_info
&arg
,
4401 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4402 CUMULATIVE_ARGS next_cum
;
4405 /* This argument doesn't appear to be used anymore. Which is good,
4406 because the old code here didn't suppress rtl generation. */
4407 gcc_assert (!no_rtl
);
4412 fntype
= TREE_TYPE (current_function_decl
);
4414 /* For varargs, we do not want to skip the dummy va_dcl argument.
4415 For stdargs, we do want to skip the last named argument. */
4417 if (stdarg_p (fntype
))
4418 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), arg
);
4420 if (cum
->call_abi
== MS_ABI
)
4421 setup_incoming_varargs_ms_64 (&next_cum
);
4423 setup_incoming_varargs_64 (&next_cum
);
4426 /* Checks if TYPE is of kind va_list char *. */
4429 is_va_list_char_pointer (tree type
)
4433 /* For 32-bit it is always true. */
4436 canonic
= ix86_canonical_va_list_type (type
);
4437 return (canonic
== ms_va_list_type_node
4438 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
4441 /* Implement va_start. */
4444 ix86_va_start (tree valist
, rtx nextarg
)
4446 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4447 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4448 tree gpr
, fpr
, ovf
, sav
, t
;
4452 if (flag_split_stack
4453 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4455 unsigned int scratch_regno
;
4457 /* When we are splitting the stack, we can't refer to the stack
4458 arguments using internal_arg_pointer, because they may be on
4459 the old stack. The split stack prologue will arrange to
4460 leave a pointer to the old stack arguments in a scratch
4461 register, which we here copy to a pseudo-register. The split
4462 stack prologue can't set the pseudo-register directly because
4463 it (the prologue) runs before any registers have been saved. */
4465 scratch_regno
= split_stack_prologue_scratch_regno ();
4466 if (scratch_regno
!= INVALID_REGNUM
)
4471 reg
= gen_reg_rtx (Pmode
);
4472 cfun
->machine
->split_stack_varargs_pointer
= reg
;
4475 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
4479 push_topmost_sequence ();
4480 emit_insn_after (seq
, entry_of_function ());
4481 pop_topmost_sequence ();
4485 /* Only 64bit target needs something special. */
4486 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4488 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4489 std_expand_builtin_va_start (valist
, nextarg
);
4494 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
4495 next
= expand_binop (ptr_mode
, add_optab
,
4496 cfun
->machine
->split_stack_varargs_pointer
,
4497 crtl
->args
.arg_offset_rtx
,
4498 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4499 convert_move (va_r
, next
, 0);
4504 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4505 f_fpr
= DECL_CHAIN (f_gpr
);
4506 f_ovf
= DECL_CHAIN (f_fpr
);
4507 f_sav
= DECL_CHAIN (f_ovf
);
4509 valist
= build_simple_mem_ref (valist
);
4510 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
4511 /* The following should be folded into the MEM_REF offset. */
4512 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
4514 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
4516 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
4518 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
4521 /* Count number of gp and fp argument registers used. */
4522 words
= crtl
->args
.info
.words
;
4523 n_gpr
= crtl
->args
.info
.regno
;
4524 n_fpr
= crtl
->args
.info
.sse_regno
;
4526 if (cfun
->va_list_gpr_size
)
4528 type
= TREE_TYPE (gpr
);
4529 t
= build2 (MODIFY_EXPR
, type
,
4530 gpr
, build_int_cst (type
, n_gpr
* 8));
4531 TREE_SIDE_EFFECTS (t
) = 1;
4532 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4535 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4537 type
= TREE_TYPE (fpr
);
4538 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4539 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
4540 TREE_SIDE_EFFECTS (t
) = 1;
4541 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4544 /* Find the overflow area. */
4545 type
= TREE_TYPE (ovf
);
4546 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4547 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
4549 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
4550 t
= make_tree (type
, ovf_rtx
);
4552 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
4554 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4555 TREE_SIDE_EFFECTS (t
) = 1;
4556 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4558 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
4560 /* Find the register save area.
4561 Prologue of the function save it right above stack frame. */
4562 type
= TREE_TYPE (sav
);
4563 t
= make_tree (type
, frame_pointer_rtx
);
4564 if (!ix86_varargs_gpr_size
)
4565 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
4567 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4568 TREE_SIDE_EFFECTS (t
) = 1;
4569 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4573 /* Implement va_arg. */
4576 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4579 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4580 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4581 tree gpr
, fpr
, ovf
, sav
, t
;
4583 tree lab_false
, lab_over
= NULL_TREE
;
4588 machine_mode nat_mode
;
4589 unsigned int arg_boundary
;
4590 unsigned int type_align
;
4592 /* Only 64bit target needs something special. */
4593 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4594 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4596 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4597 f_fpr
= DECL_CHAIN (f_gpr
);
4598 f_ovf
= DECL_CHAIN (f_fpr
);
4599 f_sav
= DECL_CHAIN (f_ovf
);
4601 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
4602 valist
, f_gpr
, NULL_TREE
);
4604 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4605 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4606 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4608 indirect_p
= pass_va_arg_by_reference (type
);
4610 type
= build_pointer_type (type
);
4611 size
= arg_int_size_in_bytes (type
);
4612 rsize
= CEIL (size
, UNITS_PER_WORD
);
4614 nat_mode
= type_natural_mode (type
, NULL
, false);
4629 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4630 if (!TARGET_64BIT_MS_ABI
)
4638 container
= construct_container (nat_mode
, TYPE_MODE (type
),
4639 type
, 0, X86_64_REGPARM_MAX
,
4640 X86_64_SSE_REGPARM_MAX
, intreg
,
4645 /* Pull the value out of the saved registers. */
4647 addr
= create_tmp_var (ptr_type_node
, "addr");
4648 type_align
= TYPE_ALIGN (type
);
4652 int needed_intregs
, needed_sseregs
;
4654 tree int_addr
, sse_addr
;
4656 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
4657 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
4659 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4661 need_temp
= (!REG_P (container
)
4662 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4663 || TYPE_ALIGN (type
) > 128));
4665 /* In case we are passing structure, verify that it is consecutive block
4666 on the register save area. If not we need to do moves. */
4667 if (!need_temp
&& !REG_P (container
))
4669 /* Verify that all registers are strictly consecutive */
4670 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4674 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4676 rtx slot
= XVECEXP (container
, 0, i
);
4677 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4678 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4686 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4688 rtx slot
= XVECEXP (container
, 0, i
);
4689 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4690 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4702 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4703 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4706 /* First ensure that we fit completely in registers. */
4709 t
= build_int_cst (TREE_TYPE (gpr
),
4710 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
4711 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4712 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4713 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4714 gimplify_and_add (t
, pre_p
);
4718 t
= build_int_cst (TREE_TYPE (fpr
),
4719 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4720 + X86_64_REGPARM_MAX
* 8);
4721 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4722 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4723 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4724 gimplify_and_add (t
, pre_p
);
4727 /* Compute index to start of area used for integer regs. */
4730 /* int_addr = gpr + sav; */
4731 t
= fold_build_pointer_plus (sav
, gpr
);
4732 gimplify_assign (int_addr
, t
, pre_p
);
4736 /* sse_addr = fpr + sav; */
4737 t
= fold_build_pointer_plus (sav
, fpr
);
4738 gimplify_assign (sse_addr
, t
, pre_p
);
4742 int i
, prev_size
= 0;
4743 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4746 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4747 gimplify_assign (addr
, t
, pre_p
);
4749 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4751 rtx slot
= XVECEXP (container
, 0, i
);
4752 rtx reg
= XEXP (slot
, 0);
4753 machine_mode mode
= GET_MODE (reg
);
4759 tree dest_addr
, dest
;
4760 int cur_size
= GET_MODE_SIZE (mode
);
4762 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
4763 prev_size
= INTVAL (XEXP (slot
, 1));
4764 if (prev_size
+ cur_size
> size
)
4766 cur_size
= size
- prev_size
;
4767 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
4768 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
4771 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4772 if (mode
== GET_MODE (reg
))
4773 addr_type
= build_pointer_type (piece_type
);
4775 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4777 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4780 if (SSE_REGNO_P (REGNO (reg
)))
4782 src_addr
= sse_addr
;
4783 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4787 src_addr
= int_addr
;
4788 src_offset
= REGNO (reg
) * 8;
4790 src_addr
= fold_convert (addr_type
, src_addr
);
4791 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
4793 dest_addr
= fold_convert (daddr_type
, addr
);
4794 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
4795 if (cur_size
== GET_MODE_SIZE (mode
))
4797 src
= build_va_arg_indirect_ref (src_addr
);
4798 dest
= build_va_arg_indirect_ref (dest_addr
);
4800 gimplify_assign (dest
, src
, pre_p
);
4805 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
4806 3, dest_addr
, src_addr
,
4807 size_int (cur_size
));
4808 gimplify_and_add (copy
, pre_p
);
4810 prev_size
+= cur_size
;
4816 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4817 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4818 gimplify_assign (gpr
, t
, pre_p
);
4819 /* The GPR save area guarantees only 8-byte alignment. */
4821 type_align
= MIN (type_align
, 64);
4826 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4827 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4828 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
4831 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
4833 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
4836 /* ... otherwise out of the overflow area. */
4838 /* When we align parameter on stack for caller, if the parameter
4839 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4840 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4841 here with caller. */
4842 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
4843 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
4844 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
4846 /* Care for on-stack alignment if needed. */
4847 if (arg_boundary
<= 64 || size
== 0)
4851 HOST_WIDE_INT align
= arg_boundary
/ 8;
4852 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
4853 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4854 build_int_cst (TREE_TYPE (t
), -align
));
4857 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4858 gimplify_assign (addr
, t
, pre_p
);
4860 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
4861 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
4864 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
4866 type
= build_aligned_type (type
, type_align
);
4867 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
4868 addr
= fold_convert (ptrtype
, addr
);
4871 addr
= build_va_arg_indirect_ref (addr
);
4872 return build_va_arg_indirect_ref (addr
);
4875 /* Return true if OPNUM's MEM should be matched
4876 in movabs* patterns. */
4879 ix86_check_movabs (rtx insn
, int opnum
)
4883 set
= PATTERN (insn
);
4884 if (GET_CODE (set
) == PARALLEL
)
4885 set
= XVECEXP (set
, 0, 0);
4886 gcc_assert (GET_CODE (set
) == SET
);
4887 mem
= XEXP (set
, opnum
);
4888 while (SUBREG_P (mem
))
4889 mem
= SUBREG_REG (mem
);
4890 gcc_assert (MEM_P (mem
));
4891 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
4894 /* Return false if INSN contains a MEM with a non-default address space. */
4896 ix86_check_no_addr_space (rtx insn
)
4898 subrtx_var_iterator::array_type array
;
4899 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
4902 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
4908 /* Initialize the table of extra 80387 mathematical constants. */
4911 init_ext_80387_constants (void)
4913 static const char * cst
[5] =
4915 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4916 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4917 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4918 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4919 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4923 for (i
= 0; i
< 5; i
++)
4925 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4926 /* Ensure each constant is rounded to XFmode precision. */
4927 real_convert (&ext_80387_constants_table
[i
],
4928 XFmode
, &ext_80387_constants_table
[i
]);
4931 ext_80387_constants_init
= 1;
4934 /* Return non-zero if the constant is something that
4935 can be loaded with a special instruction. */
4938 standard_80387_constant_p (rtx x
)
4940 machine_mode mode
= GET_MODE (x
);
4942 const REAL_VALUE_TYPE
*r
;
4944 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
4947 if (x
== CONST0_RTX (mode
))
4949 if (x
== CONST1_RTX (mode
))
4952 r
= CONST_DOUBLE_REAL_VALUE (x
);
4954 /* For XFmode constants, try to find a special 80387 instruction when
4955 optimizing for size or on those CPUs that benefit from them. */
4957 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
4961 if (! ext_80387_constants_init
)
4962 init_ext_80387_constants ();
4964 for (i
= 0; i
< 5; i
++)
4965 if (real_identical (r
, &ext_80387_constants_table
[i
]))
4969 /* Load of the constant -0.0 or -1.0 will be split as
4970 fldz;fchs or fld1;fchs sequence. */
4971 if (real_isnegzero (r
))
4973 if (real_identical (r
, &dconstm1
))
4979 /* Return the opcode of the special instruction to be used to load
4983 standard_80387_constant_opcode (rtx x
)
4985 switch (standard_80387_constant_p (x
))
5009 /* Return the CONST_DOUBLE representing the 80387 constant that is
5010 loaded by the specified special instruction. The argument IDX
5011 matches the return value from standard_80387_constant_p. */
5014 standard_80387_constant_rtx (int idx
)
5018 if (! ext_80387_constants_init
)
5019 init_ext_80387_constants ();
5035 return const_double_from_real_value (ext_80387_constants_table
[i
],
5039 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
5040 in supported SSE/AVX vector mode. */
5043 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
5050 mode
= GET_MODE (x
);
5052 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5055 if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
5057 /* VOIDmode integer constant, get mode from the predicate. */
5058 if (mode
== VOIDmode
)
5061 switch (GET_MODE_SIZE (mode
))
5086 /* Return the opcode of the special instruction to be used to load
5087 the constant operands[1] into operands[0]. */
5090 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
5093 rtx x
= operands
[1];
5095 gcc_assert (TARGET_SSE
);
5097 mode
= GET_MODE (x
);
5099 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5101 switch (get_attr_mode (insn
))
5104 if (!EXT_REX_SSE_REG_P (operands
[0]))
5105 return "%vpxor\t%0, %d0";
5109 if (EXT_REX_SSE_REG_P (operands
[0]))
5110 return (TARGET_AVX512VL
5111 ? "vpxord\t%x0, %x0, %x0"
5112 : "vpxord\t%g0, %g0, %g0");
5113 return "vpxor\t%x0, %x0, %x0";
5116 if (!EXT_REX_SSE_REG_P (operands
[0]))
5117 return "%vxorpd\t%0, %d0";
5121 if (!EXT_REX_SSE_REG_P (operands
[0]))
5122 return "vxorpd\t%x0, %x0, %x0";
5123 else if (TARGET_AVX512DQ
)
5124 return (TARGET_AVX512VL
5125 ? "vxorpd\t%x0, %x0, %x0"
5126 : "vxorpd\t%g0, %g0, %g0");
5128 return (TARGET_AVX512VL
5129 ? "vpxorq\t%x0, %x0, %x0"
5130 : "vpxorq\t%g0, %g0, %g0");
5133 if (!EXT_REX_SSE_REG_P (operands
[0]))
5134 return "%vxorps\t%0, %d0";
5138 if (!EXT_REX_SSE_REG_P (operands
[0]))
5139 return "vxorps\t%x0, %x0, %x0";
5140 else if (TARGET_AVX512DQ
)
5141 return (TARGET_AVX512VL
5142 ? "vxorps\t%x0, %x0, %x0"
5143 : "vxorps\t%g0, %g0, %g0");
5145 return (TARGET_AVX512VL
5146 ? "vpxord\t%x0, %x0, %x0"
5147 : "vpxord\t%g0, %g0, %g0");
5153 else if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
5155 enum attr_mode insn_mode
= get_attr_mode (insn
);
5162 gcc_assert (TARGET_AVX512F
);
5163 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5168 gcc_assert (TARGET_AVX2
);
5173 gcc_assert (TARGET_SSE2
);
5174 if (!EXT_REX_SSE_REG_P (operands
[0]))
5176 ? "vpcmpeqd\t%0, %0, %0"
5177 : "pcmpeqd\t%0, %0");
5178 else if (TARGET_AVX512VL
)
5179 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5181 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5191 /* Returns true if INSN can be transformed from a memory load
5192 to a supported FP constant load. */
5195 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
5197 rtx src
= find_constant_src (insn
);
5199 gcc_assert (REG_P (dst
));
5202 || (SSE_REGNO_P (REGNO (dst
))
5203 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
5204 || (STACK_REGNO_P (REGNO (dst
))
5205 && standard_80387_constant_p (src
) < 1))
5211 /* Predicate for pre-reload splitters with associated instructions,
5212 which can match any time before the split1 pass (usually combine),
5213 then are unconditionally split in that pass and should not be
5214 matched again afterwards. */
5217 ix86_pre_reload_split (void)
5219 return (can_create_pseudo_p ()
5220 && !(cfun
->curr_properties
& PROP_rtl_split_insns
));
5223 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5224 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5225 TARGET_AVX512VL or it is a register to register move which can
5226 be done with zmm register move. */
5229 ix86_get_ssemov (rtx
*operands
, unsigned size
,
5230 enum attr_mode insn_mode
, machine_mode mode
)
5233 bool misaligned_p
= (misaligned_operand (operands
[0], mode
)
5234 || misaligned_operand (operands
[1], mode
));
5235 bool evex_reg_p
= (size
== 64
5236 || EXT_REX_SSE_REG_P (operands
[0])
5237 || EXT_REX_SSE_REG_P (operands
[1]));
5238 machine_mode scalar_mode
;
5240 const char *opcode
= NULL
;
5246 } type
= opcode_int
;
5253 scalar_mode
= E_SFmode
;
5254 type
= opcode_float
;
5259 scalar_mode
= E_DFmode
;
5260 type
= opcode_double
;
5265 scalar_mode
= GET_MODE_INNER (mode
);
5271 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5272 we can only use zmm register move without memory operand. */
5275 && GET_MODE_SIZE (mode
) < 64)
5277 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5278 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5279 AVX512VL is disabled, LRA can still generate reg to
5280 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5282 if (memory_operand (operands
[0], mode
)
5283 || memory_operand (operands
[1], mode
))
5289 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5292 opcode
= misaligned_p
? "vmovups" : "vmovaps";
5295 opcode
= misaligned_p
? "vmovupd" : "vmovapd";
5299 else if (SCALAR_FLOAT_MODE_P (scalar_mode
))
5301 switch (scalar_mode
)
5304 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5307 opcode
= misaligned_p
? "%vmovupd" : "%vmovapd";
5311 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5313 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5319 else if (SCALAR_INT_MODE_P (scalar_mode
))
5321 switch (scalar_mode
)
5325 opcode
= (misaligned_p
5331 opcode
= (misaligned_p
5339 opcode
= (misaligned_p
5345 opcode
= (misaligned_p
5353 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5355 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5361 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5363 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5366 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5378 snprintf (buf
, sizeof (buf
), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5382 snprintf (buf
, sizeof (buf
), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5386 snprintf (buf
, sizeof (buf
), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5392 output_asm_insn (buf
, operands
);
5396 /* Return the template of the TYPE_SSEMOV instruction to move
5397 operands[1] into operands[0]. */
5400 ix86_output_ssemov (rtx_insn
*insn
, rtx
*operands
)
5402 machine_mode mode
= GET_MODE (operands
[0]);
5403 if (get_attr_type (insn
) != TYPE_SSEMOV
5404 || mode
!= GET_MODE (operands
[1]))
5407 enum attr_mode insn_mode
= get_attr_mode (insn
);
5414 return ix86_get_ssemov (operands
, 64, insn_mode
, mode
);
5419 return ix86_get_ssemov (operands
, 32, insn_mode
, mode
);
5424 return ix86_get_ssemov (operands
, 16, insn_mode
, mode
);
5427 /* Handle broken assemblers that require movd instead of movq. */
5428 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5429 && (GENERAL_REG_P (operands
[0])
5430 || GENERAL_REG_P (operands
[1])))
5431 return "%vmovd\t{%1, %0|%0, %1}";
5433 return "%vmovq\t{%1, %0|%0, %1}";
5436 return "%vmovd\t{%1, %0|%0, %1}";
5439 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5440 return "vmovsd\t{%d1, %0|%0, %d1}";
5442 return "%vmovsd\t{%1, %0|%0, %1}";
5445 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5446 return "vmovss\t{%d1, %0|%0, %d1}";
5448 return "%vmovss\t{%1, %0|%0, %1}";
5451 gcc_assert (!TARGET_AVX
);
5452 return "movlpd\t{%1, %0|%0, %1}";
5455 if (TARGET_AVX
&& REG_P (operands
[0]))
5456 return "vmovlps\t{%1, %d0|%d0, %1}";
5458 return "%vmovlps\t{%1, %0|%0, %1}";
5465 /* Returns true if OP contains a symbol reference */
5468 symbolic_reference_mentioned_p (rtx op
)
5473 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5476 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5477 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5483 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5484 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5488 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5495 /* Return true if it is appropriate to emit `ret' instructions in the
5496 body of a function. Do this only if the epilogue is simple, needing a
5497 couple of insns. Prior to reloading, we can't tell how many registers
5498 must be saved, so return false then. Return false if there is no frame
5499 marker to de-allocate. */
5502 ix86_can_use_return_insn_p (void)
5504 if (ix86_function_naked (current_function_decl
))
5507 /* Don't use `ret' instruction in interrupt handler. */
5508 if (! reload_completed
5509 || frame_pointer_needed
5510 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
5513 /* Don't allow more than 32k pop, since that's all we can do
5514 with one instruction. */
5515 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
5518 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5519 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
5520 && (frame
.nregs
+ frame
.nsseregs
) == 0);
5523 /* Return stack frame size. get_frame_size () returns used stack slots
5524 during compilation, which may be optimized out later. If stack frame
5525 is needed, stack_frame_required should be true. */
5527 static HOST_WIDE_INT
5528 ix86_get_frame_size (void)
5530 if (cfun
->machine
->stack_frame_required
)
5531 return get_frame_size ();
5536 /* Value should be nonzero if functions must have frame pointers.
5537 Zero means the frame pointer need not be set up (and parms may
5538 be accessed via the stack pointer) in functions that seem suitable. */
5541 ix86_frame_pointer_required (void)
5543 /* If we accessed previous frames, then the generated code expects
5544 to be able to access the saved ebp value in our frame. */
5545 if (cfun
->machine
->accesses_prev_frame
)
5548 /* Several x86 os'es need a frame pointer for other reasons,
5549 usually pertaining to setjmp. */
5550 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5553 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5554 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
5557 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5558 allocation is 4GB. */
5559 if (TARGET_64BIT_MS_ABI
&& ix86_get_frame_size () > SEH_MAX_FRAME_SIZE
)
5562 /* SSE saves require frame-pointer when stack is misaligned. */
5563 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
5566 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5567 turns off the frame pointer by default. Turn it back on now if
5568 we've not got a leaf function. */
5569 if (TARGET_OMIT_LEAF_FRAME_POINTER
5571 || ix86_current_function_calls_tls_descriptor
))
5574 /* Several versions of mcount for the x86 assumes that there is a
5575 frame, so we cannot allow profiling without a frame pointer. */
5576 if (crtl
->profile
&& !flag_fentry
)
5582 /* Record that the current function accesses previous call frames. */
5585 ix86_setup_frame_addresses (void)
5587 cfun
->machine
->accesses_prev_frame
= 1;
5590 #ifndef USE_HIDDEN_LINKONCE
5591 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5592 # define USE_HIDDEN_LINKONCE 1
5594 # define USE_HIDDEN_LINKONCE 0
5598 /* Label count for call and return thunks. It is used to make unique
5599 labels in call and return thunks. */
5600 static int indirectlabelno
;
5602 /* True if call thunk function is needed. */
5603 static bool indirect_thunk_needed
= false;
5605 /* Bit masks of integer registers, which contain branch target, used
5606 by call thunk functions. */
5607 static int indirect_thunks_used
;
5609 /* True if return thunk function is needed. */
5610 static bool indirect_return_needed
= false;
5612 /* True if return thunk function via CX is needed. */
5613 static bool indirect_return_via_cx
;
5615 #ifndef INDIRECT_LABEL
5616 # define INDIRECT_LABEL "LIND"
5619 /* Indicate what prefix is needed for an indirect branch. */
5620 enum indirect_thunk_prefix
5622 indirect_thunk_prefix_none
,
5623 indirect_thunk_prefix_nt
5626 /* Return the prefix needed for an indirect branch INSN. */
5628 enum indirect_thunk_prefix
5629 indirect_thunk_need_prefix (rtx_insn
*insn
)
5631 enum indirect_thunk_prefix need_prefix
;
5632 if ((cfun
->machine
->indirect_branch_type
5633 == indirect_branch_thunk_extern
)
5634 && ix86_notrack_prefixed_insn_p (insn
))
5636 /* NOTRACK prefix is only used with external thunk so that it
5637 can be properly updated to support CET at run-time. */
5638 need_prefix
= indirect_thunk_prefix_nt
;
5641 need_prefix
= indirect_thunk_prefix_none
;
5645 /* Fills in the label name that should be used for the indirect thunk. */
5648 indirect_thunk_name (char name
[32], unsigned int regno
,
5649 enum indirect_thunk_prefix need_prefix
,
5652 if (regno
!= INVALID_REGNUM
&& regno
!= CX_REG
&& ret_p
)
5655 if (USE_HIDDEN_LINKONCE
)
5659 if (need_prefix
== indirect_thunk_prefix_nt
5660 && regno
!= INVALID_REGNUM
)
5662 /* NOTRACK prefix is only used with external thunk via
5663 register so that NOTRACK prefix can be added to indirect
5664 branch via register to support CET at run-time. */
5670 const char *ret
= ret_p
? "return" : "indirect";
5672 if (regno
!= INVALID_REGNUM
)
5674 const char *reg_prefix
;
5675 if (LEGACY_INT_REGNO_P (regno
))
5676 reg_prefix
= TARGET_64BIT
? "r" : "e";
5679 sprintf (name
, "__x86_%s_thunk%s_%s%s",
5680 ret
, prefix
, reg_prefix
, reg_names
[regno
]);
5683 sprintf (name
, "__x86_%s_thunk%s", ret
, prefix
);
5687 if (regno
!= INVALID_REGNUM
)
5688 ASM_GENERATE_INTERNAL_LABEL (name
, "LITR", regno
);
5692 ASM_GENERATE_INTERNAL_LABEL (name
, "LRT", 0);
5694 ASM_GENERATE_INTERNAL_LABEL (name
, "LIT", 0);
5699 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5700 the function address is in REGNO and the call and return thunk looks like:
5711 Otherwise, the function address is on the top of stack and the
5712 call and return thunk looks like:
5720 lea WORD_SIZE(%sp), %sp
5725 output_indirect_thunk (unsigned int regno
)
5727 char indirectlabel1
[32];
5728 char indirectlabel2
[32];
5730 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
, INDIRECT_LABEL
,
5732 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
, INDIRECT_LABEL
,
5736 fputs ("\tcall\t", asm_out_file
);
5737 assemble_name_raw (asm_out_file
, indirectlabel2
);
5738 fputc ('\n', asm_out_file
);
5740 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
5742 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5743 Usage of both pause + lfence is compromise solution. */
5744 fprintf (asm_out_file
, "\tpause\n\tlfence\n");
5747 fputs ("\tjmp\t", asm_out_file
);
5748 assemble_name_raw (asm_out_file
, indirectlabel1
);
5749 fputc ('\n', asm_out_file
);
5751 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
5753 /* The above call insn pushed a word to stack. Adjust CFI info. */
5754 if (flag_asynchronous_unwind_tables
&& dwarf2out_do_frame ())
5756 if (! dwarf2out_do_cfi_asm ())
5758 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5759 xcfi
->dw_cfi_opc
= DW_CFA_advance_loc4
;
5760 xcfi
->dw_cfi_oprnd1
.dw_cfi_addr
= ggc_strdup (indirectlabel2
);
5761 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5763 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5764 xcfi
->dw_cfi_opc
= DW_CFA_def_cfa_offset
;
5765 xcfi
->dw_cfi_oprnd1
.dw_cfi_offset
= 2 * UNITS_PER_WORD
;
5766 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5767 dwarf2out_emit_cfi (xcfi
);
5770 if (regno
!= INVALID_REGNUM
)
5774 xops
[0] = gen_rtx_MEM (word_mode
, stack_pointer_rtx
);
5775 xops
[1] = gen_rtx_REG (word_mode
, regno
);
5776 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops
);
5782 xops
[0] = stack_pointer_rtx
;
5783 xops
[1] = plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
5784 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops
);
5787 fputs ("\tret\n", asm_out_file
);
5790 /* Output a funtion with a call and return thunk for indirect branch.
5791 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5792 Otherwise, the function address is on the top of stack. Thunk is
5793 used for function return if RET_P is true. */
5796 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix
,
5797 unsigned int regno
, bool ret_p
)
5802 /* Create __x86_indirect_thunk. */
5803 indirect_thunk_name (name
, regno
, need_prefix
, ret_p
);
5804 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5805 get_identifier (name
),
5806 build_function_type_list (void_type_node
, NULL_TREE
));
5807 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5808 NULL_TREE
, void_type_node
);
5809 TREE_PUBLIC (decl
) = 1;
5810 TREE_STATIC (decl
) = 1;
5811 DECL_IGNORED_P (decl
) = 1;
5816 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5817 fputs ("\t.weak_definition\t", asm_out_file
);
5818 assemble_name (asm_out_file
, name
);
5819 fputs ("\n\t.private_extern\t", asm_out_file
);
5820 assemble_name (asm_out_file
, name
);
5821 putc ('\n', asm_out_file
);
5822 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5823 DECL_WEAK (decl
) = 1;
5827 if (USE_HIDDEN_LINKONCE
)
5829 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5831 targetm
.asm_out
.unique_section (decl
, 0);
5832 switch_to_section (get_named_section (decl
, NULL
, 0));
5834 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5835 fputs ("\t.hidden\t", asm_out_file
);
5836 assemble_name (asm_out_file
, name
);
5837 putc ('\n', asm_out_file
);
5838 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5842 switch_to_section (text_section
);
5843 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5846 DECL_INITIAL (decl
) = make_node (BLOCK
);
5847 current_function_decl
= decl
;
5848 allocate_struct_function (decl
, false);
5849 init_function_start (decl
);
5850 /* We're about to hide the function body from callees of final_* by
5851 emitting it directly; tell them we're a thunk, if they care. */
5852 cfun
->is_thunk
= true;
5853 first_function_block_is_cold
= false;
5854 /* Make sure unwind info is emitted for the thunk if needed. */
5855 final_start_function (emit_barrier (), asm_out_file
, 1);
5857 output_indirect_thunk (regno
);
5859 final_end_function ();
5860 init_insn_lengths ();
5861 free_after_compilation (cfun
);
5863 current_function_decl
= NULL
;
5866 static int pic_labels_used
;
5868 /* Fills in the label name that should be used for a pc thunk for
5869 the given register. */
5872 get_pc_thunk_name (char name
[32], unsigned int regno
)
5874 gcc_assert (!TARGET_64BIT
);
5876 if (USE_HIDDEN_LINKONCE
)
5877 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
5879 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5883 /* This function generates code for -fpic that loads %ebx with
5884 the return address of the caller and then returns. */
5887 ix86_code_end (void)
5892 if (indirect_return_needed
)
5893 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5894 INVALID_REGNUM
, true);
5895 if (indirect_return_via_cx
)
5896 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5898 if (indirect_thunk_needed
)
5899 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5900 INVALID_REGNUM
, false);
5902 for (regno
= FIRST_REX_INT_REG
; regno
<= LAST_REX_INT_REG
; regno
++)
5904 unsigned int i
= regno
- FIRST_REX_INT_REG
+ LAST_INT_REG
+ 1;
5905 if ((indirect_thunks_used
& (1 << i
)))
5906 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5910 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
5915 if ((indirect_thunks_used
& (1 << regno
)))
5916 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5919 if (!(pic_labels_used
& (1 << regno
)))
5922 get_pc_thunk_name (name
, regno
);
5924 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5925 get_identifier (name
),
5926 build_function_type_list (void_type_node
, NULL_TREE
));
5927 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5928 NULL_TREE
, void_type_node
);
5929 TREE_PUBLIC (decl
) = 1;
5930 TREE_STATIC (decl
) = 1;
5931 DECL_IGNORED_P (decl
) = 1;
5936 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5937 fputs ("\t.weak_definition\t", asm_out_file
);
5938 assemble_name (asm_out_file
, name
);
5939 fputs ("\n\t.private_extern\t", asm_out_file
);
5940 assemble_name (asm_out_file
, name
);
5941 putc ('\n', asm_out_file
);
5942 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5943 DECL_WEAK (decl
) = 1;
5947 if (USE_HIDDEN_LINKONCE
)
5949 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5951 targetm
.asm_out
.unique_section (decl
, 0);
5952 switch_to_section (get_named_section (decl
, NULL
, 0));
5954 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5955 fputs ("\t.hidden\t", asm_out_file
);
5956 assemble_name (asm_out_file
, name
);
5957 putc ('\n', asm_out_file
);
5958 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5962 switch_to_section (text_section
);
5963 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5966 DECL_INITIAL (decl
) = make_node (BLOCK
);
5967 current_function_decl
= decl
;
5968 allocate_struct_function (decl
, false);
5969 init_function_start (decl
);
5970 /* We're about to hide the function body from callees of final_* by
5971 emitting it directly; tell them we're a thunk, if they care. */
5972 cfun
->is_thunk
= true;
5973 first_function_block_is_cold
= false;
5974 /* Make sure unwind info is emitted for the thunk if needed. */
5975 final_start_function (emit_barrier (), asm_out_file
, 1);
5977 /* Pad stack IP move with 4 instructions (two NOPs count
5978 as one instruction). */
5979 if (TARGET_PAD_SHORT_FUNCTION
)
5984 fputs ("\tnop\n", asm_out_file
);
5987 xops
[0] = gen_rtx_REG (Pmode
, regno
);
5988 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
5989 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
5990 output_asm_insn ("%!ret", NULL
);
5991 final_end_function ();
5992 init_insn_lengths ();
5993 free_after_compilation (cfun
);
5995 current_function_decl
= NULL
;
5998 if (flag_split_stack
)
5999 file_end_indicate_split_stack ();
6002 /* Emit code for the SET_GOT patterns. */
6005 output_set_got (rtx dest
, rtx label
)
6011 if (TARGET_VXWORKS_RTP
&& flag_pic
)
6013 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6014 xops
[2] = gen_rtx_MEM (Pmode
,
6015 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
6016 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
6018 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6019 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6020 an unadorned address. */
6021 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6022 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
6023 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
6027 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
6032 get_pc_thunk_name (name
, REGNO (dest
));
6033 pic_labels_used
|= 1 << REGNO (dest
);
6035 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
6036 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
6037 output_asm_insn ("%!call\t%X2", xops
);
6040 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6041 This is what will be referenced by the Mach-O PIC subsystem. */
6042 if (machopic_should_output_picbase_label () || !label
)
6043 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
6045 /* When we are restoring the pic base at the site of a nonlocal label,
6046 and we decided to emit the pic base above, we will still output a
6047 local label used for calculating the correction offset (even though
6048 the offset will be 0 in that case). */
6050 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6051 CODE_LABEL_NUMBER (label
));
6057 /* We don't need a pic base, we're not producing pic. */
6060 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
6061 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
6062 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6063 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
6067 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
6072 /* Generate an "push" pattern for input ARG. */
6077 struct machine_function
*m
= cfun
->machine
;
6079 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6080 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
6081 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
6083 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6084 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6086 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
6087 gen_rtx_PRE_DEC (Pmode
,
6088 stack_pointer_rtx
)),
6092 /* Generate an "pop" pattern for input ARG. */
6097 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6098 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6100 return gen_rtx_SET (arg
,
6101 gen_rtx_MEM (word_mode
,
6102 gen_rtx_POST_INC (Pmode
,
6103 stack_pointer_rtx
)));
6106 /* Return >= 0 if there is an unused call-clobbered register available
6107 for the entire function. */
6110 ix86_select_alt_pic_regnum (void)
6112 if (ix86_use_pseudo_pic_reg ())
6113 return INVALID_REGNUM
;
6117 && !ix86_current_function_calls_tls_descriptor
)
6120 /* Can't use the same register for both PIC and DRAP. */
6122 drap
= REGNO (crtl
->drap_reg
);
6125 for (i
= 2; i
>= 0; --i
)
6126 if (i
!= drap
&& !df_regs_ever_live_p (i
))
6130 return INVALID_REGNUM
;
6133 /* Return true if REGNO is used by the epilogue. */
6136 ix86_epilogue_uses (int regno
)
6138 /* If there are no caller-saved registers, we preserve all registers,
6139 except for MMX and x87 registers which aren't supported when saving
6140 and restoring registers. Don't explicitly save SP register since
6141 it is always preserved. */
6142 return (epilogue_completed
6143 && cfun
->machine
->no_caller_saved_registers
6144 && !fixed_regs
[regno
]
6145 && !STACK_REGNO_P (regno
)
6146 && !MMX_REGNO_P (regno
));
6149 /* Return nonzero if register REGNO can be used as a scratch register
6153 ix86_hard_regno_scratch_ok (unsigned int regno
)
6155 /* If there are no caller-saved registers, we can't use any register
6156 as a scratch register after epilogue and use REGNO as scratch
6157 register only if it has been used before to avoid saving and
6159 return (!cfun
->machine
->no_caller_saved_registers
6160 || (!epilogue_completed
6161 && df_regs_ever_live_p (regno
)));
6164 /* Return TRUE if we need to save REGNO. */
6167 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
6169 /* If there are no caller-saved registers, we preserve all registers,
6170 except for MMX and x87 registers which aren't supported when saving
6171 and restoring registers. Don't explicitly save SP register since
6172 it is always preserved. */
6173 if (cfun
->machine
->no_caller_saved_registers
)
6175 /* Don't preserve registers used for function return value. */
6176 rtx reg
= crtl
->return_rtx
;
6179 unsigned int i
= REGNO (reg
);
6180 unsigned int nregs
= REG_NREGS (reg
);
6182 if ((i
+ nregs
) == regno
)
6186 return (df_regs_ever_live_p (regno
)
6187 && !fixed_regs
[regno
]
6188 && !STACK_REGNO_P (regno
)
6189 && !MMX_REGNO_P (regno
)
6190 && (regno
!= HARD_FRAME_POINTER_REGNUM
6191 || !frame_pointer_needed
));
6194 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
6195 && pic_offset_table_rtx
)
6197 if (ix86_use_pseudo_pic_reg ())
6199 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6200 _mcount in prologue. */
6201 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
6204 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6206 || crtl
->calls_eh_return
6207 || crtl
->uses_const_pool
6208 || cfun
->has_nonlocal_label
)
6209 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
6212 if (crtl
->calls_eh_return
&& maybe_eh_return
)
6217 unsigned test
= EH_RETURN_DATA_REGNO (i
);
6218 if (test
== INVALID_REGNUM
)
6225 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
6227 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
6228 + xlogue_layout::MIN_REGS
;
6229 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
6234 && regno
== REGNO (crtl
->drap_reg
)
6235 && !cfun
->machine
->no_drap_save_restore
)
6238 return (df_regs_ever_live_p (regno
)
6239 && !call_used_or_fixed_reg_p (regno
)
6240 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
6243 /* Return number of saved general prupose registers. */
6246 ix86_nsaved_regs (void)
6251 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6252 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6257 /* Return number of saved SSE registers. */
6260 ix86_nsaved_sseregs (void)
6265 if (!TARGET_64BIT_MS_ABI
)
6267 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6268 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6273 /* Given FROM and TO register numbers, say whether this elimination is
6274 allowed. If stack alignment is needed, we can only replace argument
6275 pointer with hard frame pointer, or replace frame pointer with stack
6276 pointer. Otherwise, frame pointer elimination is automatically
6277 handled and all other eliminations are valid. */
6280 ix86_can_eliminate (const int from
, const int to
)
6282 if (stack_realign_fp
)
6283 return ((from
== ARG_POINTER_REGNUM
6284 && to
== HARD_FRAME_POINTER_REGNUM
)
6285 || (from
== FRAME_POINTER_REGNUM
6286 && to
== STACK_POINTER_REGNUM
));
6288 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
6291 /* Return the offset between two registers, one to be eliminated, and the other
6292 its replacement, at the start of a routine. */
6295 ix86_initial_elimination_offset (int from
, int to
)
6297 struct ix86_frame
&frame
= cfun
->machine
->frame
;
6299 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
6300 return frame
.hard_frame_pointer_offset
;
6301 else if (from
== FRAME_POINTER_REGNUM
6302 && to
== HARD_FRAME_POINTER_REGNUM
)
6303 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
6306 gcc_assert (to
== STACK_POINTER_REGNUM
);
6308 if (from
== ARG_POINTER_REGNUM
)
6309 return frame
.stack_pointer_offset
;
6311 gcc_assert (from
== FRAME_POINTER_REGNUM
);
6312 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
6316 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6317 void warn_once_call_ms2sysv_xlogues (const char *feature
)
6319 static bool warned_once
= false;
6322 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6328 /* Return the probing interval for -fstack-clash-protection. */
6330 static HOST_WIDE_INT
6331 get_probe_interval (void)
6333 if (flag_stack_clash_protection
)
6334 return (HOST_WIDE_INT_1U
6335 << param_stack_clash_protection_probe_interval
);
6337 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
6340 /* When using -fsplit-stack, the allocation routines set a field in
6341 the TCB to the bottom of the stack plus this much space, measured
6344 #define SPLIT_STACK_AVAILABLE 256
6346 /* Fill structure ix86_frame about frame of currently computed function. */
6349 ix86_compute_frame_layout (void)
6351 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
6352 struct machine_function
*m
= cfun
->machine
;
6353 unsigned HOST_WIDE_INT stack_alignment_needed
;
6354 HOST_WIDE_INT offset
;
6355 unsigned HOST_WIDE_INT preferred_alignment
;
6356 HOST_WIDE_INT size
= ix86_get_frame_size ();
6357 HOST_WIDE_INT to_allocate
;
6359 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6360 * ms_abi functions that call a sysv function. We now need to prune away
6361 * cases where it should be disabled. */
6362 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6364 gcc_assert (TARGET_64BIT_MS_ABI
);
6365 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
6366 gcc_assert (!TARGET_SEH
);
6367 gcc_assert (TARGET_SSE
);
6368 gcc_assert (!ix86_using_red_zone ());
6370 if (crtl
->calls_eh_return
)
6372 gcc_assert (!reload_completed
);
6373 m
->call_ms2sysv
= false;
6374 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6377 else if (ix86_static_chain_on_stack
)
6379 gcc_assert (!reload_completed
);
6380 m
->call_ms2sysv
= false;
6381 warn_once_call_ms2sysv_xlogues ("static call chains");
6384 /* Finally, compute which registers the stub will manage. */
6387 unsigned count
= xlogue_layout::count_stub_managed_regs ();
6388 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
6389 m
->call_ms2sysv_pad_in
= 0;
6393 frame
->nregs
= ix86_nsaved_regs ();
6394 frame
->nsseregs
= ix86_nsaved_sseregs ();
6396 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6397 except for function prologues, leaf functions and when the defult
6398 incoming stack boundary is overriden at command line or via
6399 force_align_arg_pointer attribute.
6401 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6402 at call sites, including profile function calls.
6404 if (((TARGET_64BIT_MS_ABI
|| TARGET_MACHO
)
6405 && crtl
->preferred_stack_boundary
< 128)
6406 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
6407 || ix86_current_function_calls_tls_descriptor
6408 || (TARGET_MACHO
&& crtl
->profile
)
6409 || ix86_incoming_stack_boundary
< 128))
6411 crtl
->preferred_stack_boundary
= 128;
6412 crtl
->stack_alignment_needed
= 128;
6415 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
6416 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
6418 gcc_assert (!size
|| stack_alignment_needed
);
6419 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
6420 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
6422 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6423 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
6424 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6426 gcc_assert (stack_alignment_needed
>= 16);
6427 gcc_assert (!frame
->nsseregs
);
6430 /* For SEH we have to limit the amount of code movement into the prologue.
6431 At present we do this via a BLOCKAGE, at which point there's very little
6432 scheduling that can be done, which means that there's very little point
6433 in doing anything except PUSHs. */
6435 m
->use_fast_prologue_epilogue
= false;
6436 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
6438 int count
= frame
->nregs
;
6439 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
6441 /* The fast prologue uses move instead of push to save registers. This
6442 is significantly longer, but also executes faster as modern hardware
6443 can execute the moves in parallel, but can't do that for push/pop.
6445 Be careful about choosing what prologue to emit: When function takes
6446 many instructions to execute we may use slow version as well as in
6447 case function is known to be outside hot spot (this is known with
6448 feedback only). Weight the size of function by number of registers
6449 to save as it is cheap to use one or two push instructions but very
6450 slow to use many of them.
6452 Calling this hook multiple times with the same frame requirements
6453 must produce the same layout, since the RA might otherwise be
6454 unable to reach a fixed point or might fail its final sanity checks.
6455 This means that once we've assumed that a function does or doesn't
6456 have a particular size, we have to stick to that assumption
6457 regardless of how the function has changed since. */
6459 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
6460 if (node
->frequency
< NODE_FREQUENCY_NORMAL
6461 || (flag_branch_probabilities
6462 && node
->frequency
< NODE_FREQUENCY_HOT
))
6463 m
->use_fast_prologue_epilogue
= false;
6466 if (count
!= frame
->expensive_count
)
6468 frame
->expensive_count
= count
;
6469 frame
->expensive_p
= expensive_function_p (count
);
6471 m
->use_fast_prologue_epilogue
= !frame
->expensive_p
;
6475 frame
->save_regs_using_mov
6476 = TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
;
6478 /* Skip return address and error code in exception handler. */
6479 offset
= INCOMING_FRAME_SP_OFFSET
;
6481 /* Skip pushed static chain. */
6482 if (ix86_static_chain_on_stack
)
6483 offset
+= UNITS_PER_WORD
;
6485 /* Skip saved base pointer. */
6486 if (frame_pointer_needed
)
6487 offset
+= UNITS_PER_WORD
;
6488 frame
->hfp_save_offset
= offset
;
6490 /* The traditional frame pointer location is at the top of the frame. */
6491 frame
->hard_frame_pointer_offset
= offset
;
6493 /* Register save area */
6494 offset
+= frame
->nregs
* UNITS_PER_WORD
;
6495 frame
->reg_save_offset
= offset
;
6497 /* On SEH target, registers are pushed just before the frame pointer
6500 frame
->hard_frame_pointer_offset
= offset
;
6502 /* Calculate the size of the va-arg area (not including padding, if any). */
6503 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
6505 /* Also adjust stack_realign_offset for the largest alignment of
6506 stack slot actually used. */
6507 if (stack_realign_fp
6508 || (cfun
->machine
->max_used_stack_alignment
!= 0
6509 && (offset
% cfun
->machine
->max_used_stack_alignment
) != 0))
6511 /* We may need a 16-byte aligned stack for the remainder of the
6512 register save area, but the stack frame for the local function
6513 may require a greater alignment if using AVX/2/512. In order
6514 to avoid wasting space, we first calculate the space needed for
6515 the rest of the register saves, add that to the stack pointer,
6516 and then realign the stack to the boundary of the start of the
6517 frame for the local function. */
6518 HOST_WIDE_INT space_needed
= 0;
6519 HOST_WIDE_INT sse_reg_space_needed
= 0;
6523 if (m
->call_ms2sysv
)
6525 m
->call_ms2sysv_pad_in
= 0;
6526 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
6529 else if (frame
->nsseregs
)
6530 /* The only ABI that has saved SSE registers (Win64) also has a
6531 16-byte aligned default stack. However, many programs violate
6532 the ABI, and Wine64 forces stack realignment to compensate. */
6533 space_needed
= frame
->nsseregs
* 16;
6535 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
6537 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6538 rounding to be pedantic. */
6539 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
6542 space_needed
= frame
->va_arg_size
;
6544 /* Record the allocation size required prior to the realignment AND. */
6545 frame
->stack_realign_allocate
= space_needed
;
6547 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6548 before this point are not directly comparable with values below
6549 this point. Use sp_valid_at to determine if the stack pointer is
6550 valid for a given offset, fp_valid_at for the frame pointer, or
6551 choose_baseaddr to have a base register chosen for you.
6553 Note that the result of (frame->stack_realign_offset
6554 & (stack_alignment_needed - 1)) may not equal zero. */
6555 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
6556 frame
->stack_realign_offset
= offset
- space_needed
;
6557 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
6558 + sse_reg_space_needed
;
6562 frame
->stack_realign_offset
= offset
;
6564 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6566 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
6567 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
6570 /* Align and set SSE register save area. */
6571 else if (frame
->nsseregs
)
6573 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6574 required and the DRAP re-alignment boundary is at least 16 bytes,
6575 then we want the SSE register save area properly aligned. */
6576 if (ix86_incoming_stack_boundary
>= 128
6577 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
6578 offset
= ROUND_UP (offset
, 16);
6579 offset
+= frame
->nsseregs
* 16;
6581 frame
->sse_reg_save_offset
= offset
;
6582 offset
+= frame
->va_arg_size
;
6585 /* Align start of frame for local function. When a function call
6586 is removed, it may become a leaf function. But if argument may
6587 be passed on stack, we need to align the stack when there is no
6590 || frame
->va_arg_size
!= 0
6593 || (!crtl
->tail_call_emit
6594 && cfun
->machine
->outgoing_args_on_stack
)
6595 || cfun
->calls_alloca
6596 || ix86_current_function_calls_tls_descriptor
)
6597 offset
= ROUND_UP (offset
, stack_alignment_needed
);
6599 /* Frame pointer points here. */
6600 frame
->frame_pointer_offset
= offset
;
6604 /* Add outgoing arguments area. Can be skipped if we eliminated
6605 all the function calls as dead code.
6606 Skipping is however impossible when function calls alloca. Alloca
6607 expander assumes that last crtl->outgoing_args_size
6608 of stack frame are unused. */
6609 if (ACCUMULATE_OUTGOING_ARGS
6610 && (!crtl
->is_leaf
|| cfun
->calls_alloca
6611 || ix86_current_function_calls_tls_descriptor
))
6613 offset
+= crtl
->outgoing_args_size
;
6614 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
6617 frame
->outgoing_arguments_size
= 0;
6619 /* Align stack boundary. Only needed if we're calling another function
6621 if (!crtl
->is_leaf
|| cfun
->calls_alloca
6622 || ix86_current_function_calls_tls_descriptor
)
6623 offset
= ROUND_UP (offset
, preferred_alignment
);
6625 /* We've reached end of stack frame. */
6626 frame
->stack_pointer_offset
= offset
;
6628 /* Size prologue needs to allocate. */
6629 to_allocate
= offset
- frame
->sse_reg_save_offset
;
6631 if ((!to_allocate
&& frame
->nregs
<= 1)
6632 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000))
6633 /* If static stack checking is enabled and done with probes,
6634 the registers need to be saved before allocating the frame. */
6635 || flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
6636 /* If stack clash probing needs a loop, then it needs a
6637 scratch register. But the returned register is only guaranteed
6638 to be safe to use after register saves are complete. So if
6639 stack clash protections are enabled and the allocated frame is
6640 larger than the probe interval, then use pushes to save
6641 callee saved registers. */
6642 || (flag_stack_clash_protection
&& to_allocate
> get_probe_interval ()))
6643 frame
->save_regs_using_mov
= false;
6645 if (ix86_using_red_zone ()
6646 && crtl
->sp_is_unchanging
6648 && !ix86_pc_thunk_call_expanded
6649 && !ix86_current_function_calls_tls_descriptor
)
6651 frame
->red_zone_size
= to_allocate
;
6652 if (frame
->save_regs_using_mov
)
6653 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6654 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6655 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6658 frame
->red_zone_size
= 0;
6659 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6661 /* The SEH frame pointer location is near the bottom of the frame.
6662 This is enforced by the fact that the difference between the
6663 stack pointer and the frame pointer is limited to 240 bytes in
6664 the unwind data structure. */
6669 /* If we can leave the frame pointer where it is, do so. Also, returns
6670 the establisher frame for __builtin_frame_address (0). */
6671 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
6672 if (diff
<= SEH_MAX_FRAME_SIZE
6673 && (diff
> 240 || (diff
& 15) != 0)
6674 && !crtl
->accesses_prior_frames
)
6676 /* Ideally we'd determine what portion of the local stack frame
6677 (within the constraint of the lowest 240) is most heavily used.
6678 But without that complication, simply bias the frame pointer
6679 by 128 bytes so as to maximize the amount of the local stack
6680 frame that is addressable with 8-bit offsets. */
6681 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
6686 /* This is semi-inlined memory_address_length, but simplified
6687 since we know that we're always dealing with reg+offset, and
6688 to avoid having to create and discard all that rtl. */
6691 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
6697 /* EBP and R13 cannot be encoded without an offset. */
6698 len
= (regno
== BP_REG
|| regno
== R13_REG
);
6700 else if (IN_RANGE (offset
, -128, 127))
6703 /* ESP and R12 must be encoded with a SIB byte. */
6704 if (regno
== SP_REG
|| regno
== R12_REG
)
6710 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6711 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6714 sp_valid_at (HOST_WIDE_INT cfa_offset
)
6716 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6717 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
6719 /* Validate that the cfa_offset isn't in a "no-man's land". */
6720 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
6726 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6727 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6730 fp_valid_at (HOST_WIDE_INT cfa_offset
)
6732 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6733 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
6735 /* Validate that the cfa_offset isn't in a "no-man's land". */
6736 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
6742 /* Choose a base register based upon alignment requested, speed and/or
6746 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
6747 HOST_WIDE_INT
&base_offset
,
6748 unsigned int align_reqested
, unsigned int *align
)
6750 const struct machine_function
*m
= cfun
->machine
;
6751 unsigned int hfp_align
;
6752 unsigned int drap_align
;
6753 unsigned int sp_align
;
6754 bool hfp_ok
= fp_valid_at (cfa_offset
);
6755 bool drap_ok
= m
->fs
.drap_valid
;
6756 bool sp_ok
= sp_valid_at (cfa_offset
);
6758 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
6760 /* Filter out any registers that don't meet the requested alignment
6764 if (m
->fs
.realigned
)
6765 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
6766 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6767 notes (which we would need to use a realigned stack pointer),
6768 so disable on SEH targets. */
6769 else if (m
->fs
.sp_realigned
)
6770 sp_align
= crtl
->stack_alignment_needed
;
6772 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
6773 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
6774 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
6777 if (m
->use_fast_prologue_epilogue
)
6779 /* Choose the base register most likely to allow the most scheduling
6780 opportunities. Generally FP is valid throughout the function,
6781 while DRAP must be reloaded within the epilogue. But choose either
6782 over the SP due to increased encoding size. */
6786 base_reg
= hard_frame_pointer_rtx
;
6787 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
6791 base_reg
= crtl
->drap_reg
;
6792 base_offset
= 0 - cfa_offset
;
6796 base_reg
= stack_pointer_rtx
;
6797 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6802 HOST_WIDE_INT toffset
;
6805 /* Choose the base register with the smallest address encoding.
6806 With a tie, choose FP > DRAP > SP. */
6809 base_reg
= stack_pointer_rtx
;
6810 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6811 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
6815 toffset
= 0 - cfa_offset
;
6816 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
6819 base_reg
= crtl
->drap_reg
;
6820 base_offset
= toffset
;
6826 toffset
= m
->fs
.fp_offset
- cfa_offset
;
6827 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
6830 base_reg
= hard_frame_pointer_rtx
;
6831 base_offset
= toffset
;
6836 /* Set the align return value. */
6839 if (base_reg
== stack_pointer_rtx
)
6841 else if (base_reg
== crtl
->drap_reg
)
6842 *align
= drap_align
;
6843 else if (base_reg
== hard_frame_pointer_rtx
)
6848 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6849 the alignment of address. If ALIGN is non-null, it should point to
6850 an alignment value (in bits) that is preferred or zero and will
6851 recieve the alignment of the base register that was selected,
6852 irrespective of rather or not CFA_OFFSET is a multiple of that
6853 alignment value. If it is possible for the base register offset to be
6854 non-immediate then SCRATCH_REGNO should specify a scratch register to
6857 The valid base registers are taken from CFUN->MACHINE->FS. */
6860 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
6861 unsigned int scratch_regno
= INVALID_REGNUM
)
6863 rtx base_reg
= NULL
;
6864 HOST_WIDE_INT base_offset
= 0;
6866 /* If a specific alignment is requested, try to get a base register
6867 with that alignment first. */
6868 if (align
&& *align
)
6869 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
6872 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
6874 gcc_assert (base_reg
!= NULL
);
6876 rtx base_offset_rtx
= GEN_INT (base_offset
);
6878 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
6880 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
6882 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
6883 emit_move_insn (scratch_reg
, base_offset_rtx
);
6885 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
6888 return plus_constant (Pmode
, base_reg
, base_offset
);
6891 /* Emit code to save registers in the prologue. */
6894 ix86_emit_save_regs (void)
6899 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
6900 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6902 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
6903 RTX_FRAME_RELATED_P (insn
) = 1;
6907 /* Emit a single register save at CFA - CFA_OFFSET. */
6910 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
6911 HOST_WIDE_INT cfa_offset
)
6913 struct machine_function
*m
= cfun
->machine
;
6914 rtx reg
= gen_rtx_REG (mode
, regno
);
6915 rtx mem
, addr
, base
, insn
;
6916 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
6918 addr
= choose_baseaddr (cfa_offset
, &align
);
6919 mem
= gen_frame_mem (mode
, addr
);
6921 /* The location aligment depends upon the base register. */
6922 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
6923 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
6924 set_mem_align (mem
, align
);
6926 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
6927 RTX_FRAME_RELATED_P (insn
) = 1;
6930 if (GET_CODE (base
) == PLUS
)
6931 base
= XEXP (base
, 0);
6932 gcc_checking_assert (REG_P (base
));
6934 /* When saving registers into a re-aligned local stack frame, avoid
6935 any tricky guessing by dwarf2out. */
6936 if (m
->fs
.realigned
)
6938 gcc_checking_assert (stack_realign_drap
);
6940 if (regno
== REGNO (crtl
->drap_reg
))
6942 /* A bit of a hack. We force the DRAP register to be saved in
6943 the re-aligned stack frame, which provides us with a copy
6944 of the CFA that will last past the prologue. Install it. */
6945 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6946 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6947 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6948 mem
= gen_rtx_MEM (mode
, addr
);
6949 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
6953 /* The frame pointer is a stable reference within the
6954 aligned frame. Use it. */
6955 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6956 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6957 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6958 mem
= gen_rtx_MEM (mode
, addr
);
6959 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6963 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
6964 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
6966 gcc_checking_assert (stack_realign_fp
);
6967 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6970 /* The memory may not be relative to the current CFA register,
6971 which means that we may need to generate a new pattern for
6972 use by the unwind info. */
6973 else if (base
!= m
->fs
.cfa_reg
)
6975 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
6976 m
->fs
.cfa_offset
- cfa_offset
);
6977 mem
= gen_rtx_MEM (mode
, addr
);
6978 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
6982 /* Emit code to save registers using MOV insns.
6983 First register is stored at CFA - CFA_OFFSET. */
6985 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
6989 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6990 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6992 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
6993 cfa_offset
-= UNITS_PER_WORD
;
6997 /* Emit code to save SSE registers using MOV insns.
6998 First register is stored at CFA - CFA_OFFSET. */
7000 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
7004 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7005 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7007 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
7008 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
7012 static GTY(()) rtx queued_cfa_restores
;
7014 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7015 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7016 Don't add the note if the previously saved value will be left untouched
7017 within stack red-zone till return, as unwinders can find the same value
7018 in the register and on the stack. */
7021 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
7023 if (!crtl
->shrink_wrapped
7024 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
7029 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
7030 RTX_FRAME_RELATED_P (insn
) = 1;
7034 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
7037 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7040 ix86_add_queued_cfa_restore_notes (rtx insn
)
7043 if (!queued_cfa_restores
)
7045 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
7047 XEXP (last
, 1) = REG_NOTES (insn
);
7048 REG_NOTES (insn
) = queued_cfa_restores
;
7049 queued_cfa_restores
= NULL_RTX
;
7050 RTX_FRAME_RELATED_P (insn
) = 1;
7053 /* Expand prologue or epilogue stack adjustment.
7054 The pattern exist to put a dependency on all ebp-based memory accesses.
7055 STYLE should be negative if instructions should be marked as frame related,
7056 zero if %r11 register is live and cannot be freely used and positive
7060 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
7061 int style
, bool set_cfa
)
7063 struct machine_function
*m
= cfun
->machine
;
7064 rtx addend
= offset
;
7066 bool add_frame_related_expr
= false;
7068 if (!x86_64_immediate_operand (offset
, Pmode
))
7070 /* r11 is used by indirect sibcall return as well, set before the
7071 epilogue and used after the epilogue. */
7073 addend
= gen_rtx_REG (Pmode
, R11_REG
);
7076 gcc_assert (src
!= hard_frame_pointer_rtx
7077 && dest
!= hard_frame_pointer_rtx
);
7078 addend
= hard_frame_pointer_rtx
;
7080 emit_insn (gen_rtx_SET (addend
, offset
));
7082 add_frame_related_expr
= true;
7085 insn
= emit_insn (gen_pro_epilogue_adjust_stack_add
7086 (Pmode
, dest
, src
, addend
));
7088 ix86_add_queued_cfa_restore_notes (insn
);
7094 gcc_assert (m
->fs
.cfa_reg
== src
);
7095 m
->fs
.cfa_offset
+= INTVAL (offset
);
7096 m
->fs
.cfa_reg
= dest
;
7098 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7099 r
= gen_rtx_SET (dest
, r
);
7100 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
7101 RTX_FRAME_RELATED_P (insn
) = 1;
7105 RTX_FRAME_RELATED_P (insn
) = 1;
7106 if (add_frame_related_expr
)
7108 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7109 r
= gen_rtx_SET (dest
, r
);
7110 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
7114 if (dest
== stack_pointer_rtx
)
7116 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
7117 bool valid
= m
->fs
.sp_valid
;
7118 bool realigned
= m
->fs
.sp_realigned
;
7120 if (src
== hard_frame_pointer_rtx
)
7122 valid
= m
->fs
.fp_valid
;
7124 ooffset
= m
->fs
.fp_offset
;
7126 else if (src
== crtl
->drap_reg
)
7128 valid
= m
->fs
.drap_valid
;
7134 /* Else there are two possibilities: SP itself, which we set
7135 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7136 taken care of this by hand along the eh_return path. */
7137 gcc_checking_assert (src
== stack_pointer_rtx
7138 || offset
== const0_rtx
);
7141 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
7142 m
->fs
.sp_valid
= valid
;
7143 m
->fs
.sp_realigned
= realigned
;
7148 /* Find an available register to be used as dynamic realign argument
7149 pointer regsiter. Such a register will be written in prologue and
7150 used in begin of body, so it must not be
7151 1. parameter passing register.
7153 We reuse static-chain register if it is available. Otherwise, we
7154 use DI for i386 and R13 for x86-64. We chose R13 since it has
7157 Return: the regno of chosen register. */
7160 find_drap_reg (void)
7162 tree decl
= cfun
->decl
;
7164 /* Always use callee-saved register if there are no caller-saved
7168 /* Use R13 for nested function or function need static chain.
7169 Since function with tail call may use any caller-saved
7170 registers in epilogue, DRAP must not use caller-saved
7171 register in such case. */
7172 if (DECL_STATIC_CHAIN (decl
)
7173 || cfun
->machine
->no_caller_saved_registers
7174 || crtl
->tail_call_emit
)
7181 /* Use DI for nested function or function need static chain.
7182 Since function with tail call may use any caller-saved
7183 registers in epilogue, DRAP must not use caller-saved
7184 register in such case. */
7185 if (DECL_STATIC_CHAIN (decl
)
7186 || cfun
->machine
->no_caller_saved_registers
7187 || crtl
->tail_call_emit
)
7190 /* Reuse static chain register if it isn't used for parameter
7192 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
7194 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
7195 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
7202 /* Return minimum incoming stack alignment. */
7205 ix86_minimum_incoming_stack_boundary (bool sibcall
)
7207 unsigned int incoming_stack_boundary
;
7209 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7210 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7211 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
7212 /* Prefer the one specified at command line. */
7213 else if (ix86_user_incoming_stack_boundary
)
7214 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
7215 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7216 if -mstackrealign is used, it isn't used for sibcall check and
7217 estimated stack alignment is 128bit. */
7219 && ix86_force_align_arg_pointer
7220 && crtl
->stack_alignment_estimated
== 128)
7221 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7223 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
7225 /* Incoming stack alignment can be changed on individual functions
7226 via force_align_arg_pointer attribute. We use the smallest
7227 incoming stack boundary. */
7228 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
7229 && lookup_attribute ("force_align_arg_pointer",
7230 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
7231 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7233 /* The incoming stack frame has to be aligned at least at
7234 parm_stack_boundary. */
7235 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
7236 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
7238 /* Stack at entrance of main is aligned by runtime. We use the
7239 smallest incoming stack boundary. */
7240 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
7241 && DECL_NAME (current_function_decl
)
7242 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
7243 && DECL_FILE_SCOPE_P (current_function_decl
))
7244 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
7246 return incoming_stack_boundary
;
7249 /* Update incoming stack boundary and estimated stack alignment. */
7252 ix86_update_stack_boundary (void)
7254 ix86_incoming_stack_boundary
7255 = ix86_minimum_incoming_stack_boundary (false);
7257 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7260 && crtl
->stack_alignment_estimated
< 128)
7261 crtl
->stack_alignment_estimated
= 128;
7263 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7264 if (ix86_tls_descriptor_calls_expanded_in_cfun
7265 && crtl
->preferred_stack_boundary
< 128)
7266 crtl
->preferred_stack_boundary
= 128;
7269 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7270 needed or an rtx for DRAP otherwise. */
7273 ix86_get_drap_rtx (void)
7275 /* We must use DRAP if there are outgoing arguments on stack or
7276 the stack pointer register is clobbered by asm statment and
7277 ACCUMULATE_OUTGOING_ARGS is false. */
7279 || ((cfun
->machine
->outgoing_args_on_stack
7280 || crtl
->sp_is_clobbered_by_asm
)
7281 && !ACCUMULATE_OUTGOING_ARGS
))
7282 crtl
->need_drap
= true;
7284 if (stack_realign_drap
)
7286 /* Assign DRAP to vDRAP and returns vDRAP */
7287 unsigned int regno
= find_drap_reg ();
7290 rtx_insn
*seq
, *insn
;
7292 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
7293 crtl
->drap_reg
= arg_ptr
;
7296 drap_vreg
= copy_to_reg (arg_ptr
);
7300 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
7303 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
7304 RTX_FRAME_RELATED_P (insn
) = 1;
7312 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7315 ix86_internal_arg_pointer (void)
7317 return virtual_incoming_args_rtx
;
7320 struct scratch_reg
{
7325 /* Return a short-lived scratch register for use on function entry.
7326 In 32-bit mode, it is valid only after the registers are saved
7327 in the prologue. This register must be released by means of
7328 release_scratch_register_on_entry once it is dead. */
7331 get_scratch_register_on_entry (struct scratch_reg
*sr
)
7339 /* We always use R11 in 64-bit mode. */
7344 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
7346 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7348 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7349 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
7350 int regparm
= ix86_function_regparm (fntype
, decl
);
7352 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
7354 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7355 for the static chain register. */
7356 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
7357 && drap_regno
!= AX_REG
)
7359 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7360 for the static chain register. */
7361 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
7363 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
7365 /* ecx is the static chain register. */
7366 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
7368 && drap_regno
!= CX_REG
)
7370 else if (ix86_save_reg (BX_REG
, true, false))
7372 /* esi is the static chain register. */
7373 else if (!(regparm
== 3 && static_chain_p
)
7374 && ix86_save_reg (SI_REG
, true, false))
7376 else if (ix86_save_reg (DI_REG
, true, false))
7380 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
7385 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
7388 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
7389 RTX_FRAME_RELATED_P (insn
) = 1;
7393 /* Release a scratch register obtained from the preceding function.
7395 If RELEASE_VIA_POP is true, we just pop the register off the stack
7396 to release it. This is what non-Linux systems use with -fstack-check.
7398 Otherwise we use OFFSET to locate the saved register and the
7399 allocated stack space becomes part of the local frame and is
7400 deallocated by the epilogue. */
7403 release_scratch_register_on_entry (struct scratch_reg
*sr
, HOST_WIDE_INT offset
,
7404 bool release_via_pop
)
7408 if (release_via_pop
)
7410 struct machine_function
*m
= cfun
->machine
;
7411 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
7413 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7414 RTX_FRAME_RELATED_P (insn
) = 1;
7415 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
7416 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7417 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
7418 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
7422 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
7423 x
= gen_rtx_SET (sr
->reg
, gen_rtx_MEM (word_mode
, x
));
7429 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7431 If INT_REGISTERS_SAVED is true, then integer registers have already been
7432 pushed on the stack.
7434 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7437 This assumes no knowledge of the current probing state, i.e. it is never
7438 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7439 a suitable probe. */
7442 ix86_adjust_stack_and_probe (HOST_WIDE_INT size
,
7443 const bool int_registers_saved
,
7444 const bool protection_area
)
7446 struct machine_function
*m
= cfun
->machine
;
7448 /* If this function does not statically allocate stack space, then
7449 no probes are needed. */
7452 /* However, the allocation of space via pushes for register
7453 saves could be viewed as allocating space, but without the
7455 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
7456 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
7458 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
7462 /* If we are a noreturn function, then we have to consider the
7463 possibility that we're called via a jump rather than a call.
7465 Thus we don't have the implicit probe generated by saving the
7466 return address into the stack at the call. Thus, the stack
7467 pointer could be anywhere in the guard page. The safe thing
7468 to do is emit a probe now.
7470 The probe can be avoided if we have already emitted any callee
7471 register saves into the stack or have a frame pointer (which will
7472 have been saved as well). Those saves will function as implicit
7475 ?!? This should be revamped to work like aarch64 and s390 where
7476 we track the offset from the most recent probe. Normally that
7477 offset would be zero. For a noreturn function we would reset
7478 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7479 we just probe when we cross PROBE_INTERVAL. */
7480 if (TREE_THIS_VOLATILE (cfun
->decl
)
7481 && !(m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
))
7483 /* We can safely use any register here since we're just going to push
7484 its value and immediately pop it back. But we do try and avoid
7485 argument passing registers so as not to introduce dependencies in
7486 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7487 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
7488 rtx_insn
*insn_push
= emit_insn (gen_push (dummy_reg
));
7489 rtx_insn
*insn_pop
= emit_insn (gen_pop (dummy_reg
));
7490 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
7491 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7493 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
7494 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
7495 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7496 add_reg_note (insn_push
, REG_CFA_ADJUST_CFA
, x
);
7497 RTX_FRAME_RELATED_P (insn_push
) = 1;
7498 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
7499 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7500 add_reg_note (insn_pop
, REG_CFA_ADJUST_CFA
, x
);
7501 RTX_FRAME_RELATED_P (insn_pop
) = 1;
7503 emit_insn (gen_blockage ());
7506 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
7507 const int dope
= 4 * UNITS_PER_WORD
;
7509 /* If there is protection area, take it into account in the size. */
7510 if (protection_area
)
7511 size
+= probe_interval
+ dope
;
7513 /* If we allocate less than the size of the guard statically,
7514 then no probing is necessary, but we do need to allocate
7516 else if (size
< (1 << param_stack_clash_protection_guard_size
))
7518 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7519 GEN_INT (-size
), -1,
7520 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7521 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
7525 /* We're allocating a large enough stack frame that we need to
7526 emit probes. Either emit them inline or in a loop depending
7528 if (size
<= 4 * probe_interval
)
7531 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
7533 /* Allocate PROBE_INTERVAL bytes. */
7535 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7536 GEN_INT (-probe_interval
), -1,
7537 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7538 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
7540 /* And probe at *sp. */
7541 emit_stack_probe (stack_pointer_rtx
);
7542 emit_insn (gen_blockage ());
7545 /* We need to allocate space for the residual, but we do not need
7546 to probe the residual... */
7547 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
7550 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7551 GEN_INT (residual
), -1,
7552 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7554 /* ...except if there is a protection area to maintain. */
7555 if (protection_area
)
7556 emit_stack_probe (stack_pointer_rtx
);
7559 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
7563 /* We expect the GP registers to be saved when probes are used
7564 as the probing sequences might need a scratch register and
7565 the routine to allocate one assumes the integer registers
7566 have already been saved. */
7567 gcc_assert (int_registers_saved
);
7569 struct scratch_reg sr
;
7570 get_scratch_register_on_entry (&sr
);
7572 /* If we needed to save a register, then account for any space
7573 that was pushed (we are not going to pop the register when
7574 we do the restore). */
7576 size
-= UNITS_PER_WORD
;
7578 /* Step 1: round SIZE down to a multiple of the interval. */
7579 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
7581 /* Step 2: compute final value of the loop counter. Use lea if
7583 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
7585 if (address_no_seg_operand (addr
, Pmode
))
7586 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
7589 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
7590 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
7591 gen_rtx_PLUS (Pmode
, sr
.reg
,
7592 stack_pointer_rtx
)));
7594 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7596 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7597 plus_constant (Pmode
, sr
.reg
,
7598 m
->fs
.cfa_offset
+ rounded_size
));
7599 RTX_FRAME_RELATED_P (insn
) = 1;
7602 /* Step 3: the loop. */
7603 rtx size_rtx
= GEN_INT (rounded_size
);
7604 insn
= emit_insn (gen_adjust_stack_and_probe (Pmode
, sr
.reg
, sr
.reg
,
7606 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7608 m
->fs
.cfa_offset
+= rounded_size
;
7609 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7610 plus_constant (Pmode
, stack_pointer_rtx
,
7612 RTX_FRAME_RELATED_P (insn
) = 1;
7614 m
->fs
.sp_offset
+= rounded_size
;
7615 emit_insn (gen_blockage ());
7617 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7618 is equal to ROUNDED_SIZE. */
7620 if (size
!= rounded_size
)
7622 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7623 GEN_INT (rounded_size
- size
), -1,
7624 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7626 if (protection_area
)
7627 emit_stack_probe (stack_pointer_rtx
);
7630 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
7632 /* This does not deallocate the space reserved for the scratch
7633 register. That will be deallocated in the epilogue. */
7634 release_scratch_register_on_entry (&sr
, size
, false);
7637 /* Adjust back to account for the protection area. */
7638 if (protection_area
)
7639 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7640 GEN_INT (probe_interval
+ dope
), -1,
7641 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7643 /* Make sure nothing is scheduled before we are done. */
7644 emit_insn (gen_blockage ());
7647 /* Adjust the stack pointer up to REG while probing it. */
7650 output_adjust_stack_and_probe (rtx reg
)
7652 static int labelno
= 0;
7656 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7659 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7661 /* SP = SP + PROBE_INTERVAL. */
7662 xops
[0] = stack_pointer_rtx
;
7663 xops
[1] = GEN_INT (get_probe_interval ());
7664 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7667 xops
[1] = const0_rtx
;
7668 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
7670 /* Test if SP == LAST_ADDR. */
7671 xops
[0] = stack_pointer_rtx
;
7673 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7676 fputs ("\tjne\t", asm_out_file
);
7677 assemble_name_raw (asm_out_file
, loop_lab
);
7678 fputc ('\n', asm_out_file
);
7683 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7684 inclusive. These are offsets from the current stack pointer.
7686 INT_REGISTERS_SAVED is true if integer registers have already been
7687 pushed on the stack. */
7690 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
7691 const bool int_registers_saved
)
7693 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
7695 /* See if we have a constant small number of probes to generate. If so,
7696 that's the easy case. The run-time loop is made up of 6 insns in the
7697 generic case while the compile-time loop is made up of n insns for n #
7699 if (size
<= 6 * probe_interval
)
7703 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7704 it exceeds SIZE. If only one probe is needed, this will not
7705 generate any code. Then probe at FIRST + SIZE. */
7706 for (i
= probe_interval
; i
< size
; i
+= probe_interval
)
7707 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7710 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7714 /* Otherwise, do the same as above, but in a loop. Note that we must be
7715 extra careful with variables wrapping around because we might be at
7716 the very top (or the very bottom) of the address space and we have
7717 to be able to handle this case properly; in particular, we use an
7718 equality test for the loop condition. */
7721 /* We expect the GP registers to be saved when probes are used
7722 as the probing sequences might need a scratch register and
7723 the routine to allocate one assumes the integer registers
7724 have already been saved. */
7725 gcc_assert (int_registers_saved
);
7727 HOST_WIDE_INT rounded_size
, last
;
7728 struct scratch_reg sr
;
7730 get_scratch_register_on_entry (&sr
);
7733 /* Step 1: round SIZE to the previous multiple of the interval. */
7735 rounded_size
= ROUND_DOWN (size
, probe_interval
);
7738 /* Step 2: compute initial and final value of the loop counter. */
7740 /* TEST_OFFSET = FIRST. */
7741 emit_move_insn (sr
.reg
, GEN_INT (-first
));
7743 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7744 last
= first
+ rounded_size
;
7751 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7754 while (TEST_ADDR != LAST_ADDR)
7756 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7757 until it is equal to ROUNDED_SIZE. */
7760 (gen_probe_stack_range (Pmode
, sr
.reg
, sr
.reg
, GEN_INT (-last
)));
7763 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7764 that SIZE is equal to ROUNDED_SIZE. */
7766 if (size
!= rounded_size
)
7767 emit_stack_probe (plus_constant (Pmode
,
7768 gen_rtx_PLUS (Pmode
,
7771 rounded_size
- size
));
7773 release_scratch_register_on_entry (&sr
, size
, true);
7776 /* Make sure nothing is scheduled before we are done. */
7777 emit_insn (gen_blockage ());
7780 /* Probe a range of stack addresses from REG to END, inclusive. These are
7781 offsets from the current stack pointer. */
7784 output_probe_stack_range (rtx reg
, rtx end
)
7786 static int labelno
= 0;
7790 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7793 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7795 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7797 xops
[1] = GEN_INT (get_probe_interval ());
7798 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7800 /* Probe at TEST_ADDR. */
7801 xops
[0] = stack_pointer_rtx
;
7803 xops
[2] = const0_rtx
;
7804 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
7806 /* Test if TEST_ADDR == LAST_ADDR. */
7809 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7812 fputs ("\tjne\t", asm_out_file
);
7813 assemble_name_raw (asm_out_file
, loop_lab
);
7814 fputc ('\n', asm_out_file
);
7819 /* Set stack_frame_required to false if stack frame isn't required.
7820 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7821 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7824 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment
,
7825 bool check_stack_slot
)
7827 HARD_REG_SET set_up_by_prologue
, prologue_used
;
7830 CLEAR_HARD_REG_SET (prologue_used
);
7831 CLEAR_HARD_REG_SET (set_up_by_prologue
);
7832 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
7833 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
7834 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
7835 HARD_FRAME_POINTER_REGNUM
);
7837 /* The preferred stack alignment is the minimum stack alignment. */
7838 if (stack_alignment
> crtl
->preferred_stack_boundary
)
7839 stack_alignment
= crtl
->preferred_stack_boundary
;
7841 bool require_stack_frame
= false;
7843 FOR_EACH_BB_FN (bb
, cfun
)
7846 FOR_BB_INSNS (bb
, insn
)
7847 if (NONDEBUG_INSN_P (insn
)
7848 && requires_stack_frame_p (insn
, prologue_used
,
7849 set_up_by_prologue
))
7851 require_stack_frame
= true;
7853 if (check_stack_slot
)
7855 /* Find the maximum stack alignment. */
7856 subrtx_iterator::array_type array
;
7857 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
7859 && (reg_mentioned_p (stack_pointer_rtx
,
7861 || reg_mentioned_p (frame_pointer_rtx
,
7864 unsigned int alignment
= MEM_ALIGN (*iter
);
7865 if (alignment
> stack_alignment
)
7866 stack_alignment
= alignment
;
7872 cfun
->machine
->stack_frame_required
= require_stack_frame
;
7875 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7876 will guide prologue/epilogue to be generated in correct form. */
7879 ix86_finalize_stack_frame_flags (void)
7881 /* Check if stack realign is really needed after reload, and
7882 stores result in cfun */
7883 unsigned int incoming_stack_boundary
7884 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
7885 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
7886 unsigned int stack_alignment
7887 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
7888 ? crtl
->max_used_stack_slot_alignment
7889 : crtl
->stack_alignment_needed
);
7890 unsigned int stack_realign
7891 = (incoming_stack_boundary
< stack_alignment
);
7892 bool recompute_frame_layout_p
= false;
7894 if (crtl
->stack_realign_finalized
)
7896 /* After stack_realign_needed is finalized, we can't no longer
7898 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
7902 /* It is always safe to compute max_used_stack_alignment. We
7903 compute it only if 128-bit aligned load/store may be generated
7904 on misaligned stack slot which will lead to segfault. */
7905 bool check_stack_slot
7906 = (stack_realign
|| crtl
->max_used_stack_slot_alignment
>= 128);
7907 ix86_find_max_used_stack_alignment (stack_alignment
,
7910 /* If the only reason for frame_pointer_needed is that we conservatively
7911 assumed stack realignment might be needed or -fno-omit-frame-pointer
7912 is used, but in the end nothing that needed the stack alignment had
7913 been spilled nor stack access, clear frame_pointer_needed and say we
7914 don't need stack realignment. */
7915 if ((stack_realign
|| (!flag_omit_frame_pointer
&& optimize
))
7916 && frame_pointer_needed
7918 && crtl
->sp_is_unchanging
7919 && !ix86_current_function_calls_tls_descriptor
7920 && !crtl
->accesses_prior_frames
7921 && !cfun
->calls_alloca
7922 && !crtl
->calls_eh_return
7923 /* See ira_setup_eliminable_regset for the rationale. */
7924 && !(STACK_CHECK_MOVING_SP
7927 && cfun
->can_throw_non_call_exceptions
)
7928 && !ix86_frame_pointer_required ()
7929 && ix86_get_frame_size () == 0
7930 && ix86_nsaved_sseregs () == 0
7931 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
7933 if (cfun
->machine
->stack_frame_required
)
7935 /* Stack frame is required. If stack alignment needed is less
7936 than incoming stack boundary, don't realign stack. */
7937 stack_realign
= incoming_stack_boundary
< stack_alignment
;
7940 crtl
->max_used_stack_slot_alignment
7941 = incoming_stack_boundary
;
7942 crtl
->stack_alignment_needed
7943 = incoming_stack_boundary
;
7944 /* Also update preferred_stack_boundary for leaf
7946 crtl
->preferred_stack_boundary
7947 = incoming_stack_boundary
;
7952 /* If drap has been set, but it actually isn't live at the
7953 start of the function, there is no reason to set it up. */
7956 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
7957 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
7958 REGNO (crtl
->drap_reg
)))
7960 crtl
->drap_reg
= NULL_RTX
;
7961 crtl
->need_drap
= false;
7965 cfun
->machine
->no_drap_save_restore
= true;
7967 frame_pointer_needed
= false;
7968 stack_realign
= false;
7969 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
7970 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
7971 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
7972 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
7973 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
7974 df_finish_pass (true);
7975 df_scan_alloc (NULL
);
7977 df_compute_regs_ever_live (true);
7980 if (flag_var_tracking
)
7982 /* Since frame pointer is no longer available, replace it with
7983 stack pointer - UNITS_PER_WORD in debug insns. */
7985 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
7988 next
= DF_REF_NEXT_REG (ref
);
7989 if (!DF_REF_INSN_INFO (ref
))
7992 /* Make sure the next ref is for a different instruction,
7993 so that we're not affected by the rescan. */
7994 rtx_insn
*insn
= DF_REF_INSN (ref
);
7995 while (next
&& DF_REF_INSN (next
) == insn
)
7996 next
= DF_REF_NEXT_REG (next
);
7998 if (DEBUG_INSN_P (insn
))
8000 bool changed
= false;
8001 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
8003 rtx
*loc
= DF_REF_LOC (ref
);
8004 if (*loc
== hard_frame_pointer_rtx
)
8006 *loc
= plus_constant (Pmode
,
8013 df_insn_rescan (insn
);
8018 recompute_frame_layout_p
= true;
8021 else if (crtl
->max_used_stack_slot_alignment
>= 128
8022 && cfun
->machine
->stack_frame_required
)
8024 /* We don't need to realign stack. max_used_stack_alignment is
8025 used to decide how stack frame should be aligned. This is
8026 independent of any psABIs nor 32-bit vs 64-bit. */
8027 cfun
->machine
->max_used_stack_alignment
8028 = stack_alignment
/ BITS_PER_UNIT
;
8031 if (crtl
->stack_realign_needed
!= stack_realign
)
8032 recompute_frame_layout_p
= true;
8033 crtl
->stack_realign_needed
= stack_realign
;
8034 crtl
->stack_realign_finalized
= true;
8035 if (recompute_frame_layout_p
)
8036 ix86_compute_frame_layout ();
8039 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8042 ix86_elim_entry_set_got (rtx reg
)
8044 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
8045 rtx_insn
*c_insn
= BB_HEAD (bb
);
8046 if (!NONDEBUG_INSN_P (c_insn
))
8047 c_insn
= next_nonnote_nondebug_insn (c_insn
);
8048 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
8050 rtx pat
= PATTERN (c_insn
);
8051 if (GET_CODE (pat
) == PARALLEL
)
8053 rtx vec
= XVECEXP (pat
, 0, 0);
8054 if (GET_CODE (vec
) == SET
8055 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
8056 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
8057 delete_insn (c_insn
);
8063 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
8068 addr
= plus_constant (Pmode
, frame_reg
, offset
);
8069 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
8070 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
8074 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
8076 return gen_frame_set (reg
, frame_reg
, offset
, false);
8080 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
8082 return gen_frame_set (reg
, frame_reg
, offset
, true);
8086 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
8088 struct machine_function
*m
= cfun
->machine
;
8089 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8090 + m
->call_ms2sysv_extra_regs
;
8091 rtvec v
= rtvec_alloc (ncregs
+ 1);
8092 unsigned int align
, i
, vi
= 0;
8095 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
8096 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8098 /* AL should only be live with sysv_abi. */
8099 gcc_assert (!ix86_eax_live_at_start_p ());
8100 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
8102 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8103 we've actually realigned the stack or not. */
8104 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8105 addr
= choose_baseaddr (frame
.stack_realign_offset
8106 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
8107 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8109 emit_insn (gen_rtx_SET (rax
, addr
));
8111 /* Get the stub symbol. */
8112 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
8113 : XLOGUE_STUB_SAVE
);
8114 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8116 for (i
= 0; i
< ncregs
; ++i
)
8118 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8119 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
8121 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
8124 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
8126 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
8127 RTX_FRAME_RELATED_P (insn
) = true;
8130 /* Generate and return an insn body to AND X with Y. */
8133 gen_and2_insn (rtx x
, rtx y
)
8135 enum insn_code icode
= optab_handler (and_optab
, GET_MODE (x
));
8137 gcc_assert (insn_operand_matches (icode
, 0, x
));
8138 gcc_assert (insn_operand_matches (icode
, 1, x
));
8139 gcc_assert (insn_operand_matches (icode
, 2, y
));
8141 return GEN_FCN (icode
) (x
, x
, y
);
8144 /* Expand the prologue into a bunch of separate insns. */
8147 ix86_expand_prologue (void)
8149 struct machine_function
*m
= cfun
->machine
;
8151 HOST_WIDE_INT allocate
;
8152 bool int_registers_saved
;
8153 bool sse_registers_saved
;
8154 bool save_stub_call_needed
;
8155 rtx static_chain
= NULL_RTX
;
8157 if (ix86_function_naked (current_function_decl
))
8159 if (flag_stack_usage_info
)
8160 current_function_static_stack_size
= 0;
8164 ix86_finalize_stack_frame_flags ();
8166 /* DRAP should not coexist with stack_realign_fp */
8167 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
8169 memset (&m
->fs
, 0, sizeof (m
->fs
));
8171 /* Initialize CFA state for before the prologue. */
8172 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8173 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
8175 /* Track SP offset to the CFA. We continue tracking this after we've
8176 swapped the CFA register away from SP. In the case of re-alignment
8177 this is fudged; we're interested to offsets within the local frame. */
8178 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8179 m
->fs
.sp_valid
= true;
8180 m
->fs
.sp_realigned
= false;
8182 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
8184 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
8186 /* We should have already generated an error for any use of
8187 ms_hook on a nested function. */
8188 gcc_checking_assert (!ix86_static_chain_on_stack
);
8190 /* Check if profiling is active and we shall use profiling before
8191 prologue variant. If so sorry. */
8192 if (crtl
->profile
&& flag_fentry
!= 0)
8193 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8194 "with %<-mfentry%> for 32-bit");
8196 /* In ix86_asm_output_function_label we emitted:
8197 8b ff movl.s %edi,%edi
8199 8b ec movl.s %esp,%ebp
8201 This matches the hookable function prologue in Win32 API
8202 functions in Microsoft Windows XP Service Pack 2 and newer.
8203 Wine uses this to enable Windows apps to hook the Win32 API
8204 functions provided by Wine.
8206 What that means is that we've already set up the frame pointer. */
8208 if (frame_pointer_needed
8209 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
8213 /* We've decided to use the frame pointer already set up.
8214 Describe this to the unwinder by pretending that both
8215 push and mov insns happen right here.
8217 Putting the unwind info here at the end of the ms_hook
8218 is done so that we can make absolutely certain we get
8219 the required byte sequence at the start of the function,
8220 rather than relying on an assembler that can produce
8221 the exact encoding required.
8223 However it does mean (in the unpatched case) that we have
8224 a 1 insn window where the asynchronous unwind info is
8225 incorrect. However, if we placed the unwind info at
8226 its correct location we would have incorrect unwind info
8227 in the patched case. Which is probably all moot since
8228 I don't expect Wine generates dwarf2 unwind info for the
8229 system libraries that use this feature. */
8231 insn
= emit_insn (gen_blockage ());
8233 push
= gen_push (hard_frame_pointer_rtx
);
8234 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
8236 RTX_FRAME_RELATED_P (push
) = 1;
8237 RTX_FRAME_RELATED_P (mov
) = 1;
8239 RTX_FRAME_RELATED_P (insn
) = 1;
8240 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8241 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
8243 /* Note that gen_push incremented m->fs.cfa_offset, even
8244 though we didn't emit the push insn here. */
8245 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8246 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
8247 m
->fs
.fp_valid
= true;
8251 /* The frame pointer is not needed so pop %ebp again.
8252 This leaves us with a pristine state. */
8253 emit_insn (gen_pop (hard_frame_pointer_rtx
));
8257 /* The first insn of a function that accepts its static chain on the
8258 stack is to push the register that would be filled in by a direct
8259 call. This insn will be skipped by the trampoline. */
8260 else if (ix86_static_chain_on_stack
)
8262 static_chain
= ix86_static_chain (cfun
->decl
, false);
8263 insn
= emit_insn (gen_push (static_chain
));
8264 emit_insn (gen_blockage ());
8266 /* We don't want to interpret this push insn as a register save,
8267 only as a stack adjustment. The real copy of the register as
8268 a save will be done later, if needed. */
8269 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
8270 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
8271 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
8272 RTX_FRAME_RELATED_P (insn
) = 1;
8275 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8276 of DRAP is needed and stack realignment is really needed after reload */
8277 if (stack_realign_drap
)
8279 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8281 /* Can't use DRAP in interrupt function. */
8282 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
8283 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8284 "in interrupt service routine. This may be worked "
8285 "around by avoiding functions with aggregate return.");
8287 /* Only need to push parameter pointer reg if it is caller saved. */
8288 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8290 /* Push arg pointer reg */
8291 insn
= emit_insn (gen_push (crtl
->drap_reg
));
8292 RTX_FRAME_RELATED_P (insn
) = 1;
8295 /* Grab the argument pointer. */
8296 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
8297 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8298 RTX_FRAME_RELATED_P (insn
) = 1;
8299 m
->fs
.cfa_reg
= crtl
->drap_reg
;
8300 m
->fs
.cfa_offset
= 0;
8302 /* Align the stack. */
8303 insn
= emit_insn (gen_and2_insn (stack_pointer_rtx
,
8304 GEN_INT (-align_bytes
)));
8305 RTX_FRAME_RELATED_P (insn
) = 1;
8307 /* Replicate the return address on the stack so that return
8308 address can be reached via (argp - 1) slot. This is needed
8309 to implement macro RETURN_ADDR_RTX and intrinsic function
8310 expand_builtin_return_addr etc. */
8311 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
8312 t
= gen_frame_mem (word_mode
, t
);
8313 insn
= emit_insn (gen_push (t
));
8314 RTX_FRAME_RELATED_P (insn
) = 1;
8316 /* For the purposes of frame and register save area addressing,
8317 we've started over with a new frame. */
8318 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8319 m
->fs
.realigned
= true;
8323 /* Replicate static chain on the stack so that static chain
8324 can be reached via (argp - 2) slot. This is needed for
8325 nested function with stack realignment. */
8326 insn
= emit_insn (gen_push (static_chain
));
8327 RTX_FRAME_RELATED_P (insn
) = 1;
8331 int_registers_saved
= (frame
.nregs
== 0);
8332 sse_registers_saved
= (frame
.nsseregs
== 0);
8333 save_stub_call_needed
= (m
->call_ms2sysv
);
8334 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
8336 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8338 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8339 slower on all targets. Also sdb didn't like it. */
8340 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
8341 RTX_FRAME_RELATED_P (insn
) = 1;
8343 /* Push registers now, before setting the frame pointer
8345 if (!int_registers_saved
8347 && !frame
.save_regs_using_mov
)
8349 ix86_emit_save_regs ();
8350 int_registers_saved
= true;
8351 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
8354 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
8356 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
8357 RTX_FRAME_RELATED_P (insn
) = 1;
8359 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8360 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8361 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
8362 m
->fs
.fp_valid
= true;
8366 if (!int_registers_saved
)
8368 /* If saving registers via PUSH, do so now. */
8369 if (!frame
.save_regs_using_mov
)
8371 ix86_emit_save_regs ();
8372 int_registers_saved
= true;
8373 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
8376 /* When using red zone we may start register saving before allocating
8377 the stack frame saving one cycle of the prologue. However, avoid
8378 doing this if we have to probe the stack; at least on x86_64 the
8379 stack probe can turn into a call that clobbers a red zone location. */
8380 else if (ix86_using_red_zone ()
8381 && (! TARGET_STACK_PROBE
8382 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
8384 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8385 int_registers_saved
= true;
8389 if (stack_realign_fp
)
8391 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8392 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
8394 /* Record last valid frame pointer offset. */
8395 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
8397 /* The computation of the size of the re-aligned stack frame means
8398 that we must allocate the size of the register save area before
8399 performing the actual alignment. Otherwise we cannot guarantee
8400 that there's enough storage above the realignment point. */
8401 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
8402 + frame
.stack_realign_allocate
;
8404 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8405 GEN_INT (-allocate
), -1, false);
8407 /* Align the stack. */
8408 emit_insn (gen_and2_insn (stack_pointer_rtx
, GEN_INT (-align_bytes
)));
8409 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
8410 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
8411 - frame
.stack_realign_allocate
;
8412 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8413 Beyond this point, stack access should be done via choose_baseaddr or
8414 by using sp_valid_at and fp_valid_at to determine the correct base
8415 register. Henceforth, any CFA offset should be thought of as logical
8416 and not physical. */
8417 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
8418 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
8419 m
->fs
.sp_realigned
= true;
8421 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8422 is needed to describe where a register is saved using a realigned
8423 stack pointer, so we need to invalidate the stack pointer for that
8426 m
->fs
.sp_valid
= false;
8428 /* If SP offset is non-immediate after allocation of the stack frame,
8429 then emit SSE saves or stub call prior to allocating the rest of the
8430 stack frame. This is less efficient for the out-of-line stub because
8431 we can't combine allocations across the call barrier, but it's better
8432 than using a scratch register. */
8433 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
8434 - m
->fs
.sp_realigned_offset
),
8437 if (!sse_registers_saved
)
8439 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8440 sse_registers_saved
= true;
8442 else if (save_stub_call_needed
)
8444 ix86_emit_outlined_ms2sysv_save (frame
);
8445 save_stub_call_needed
= false;
8450 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
8452 if (flag_stack_usage_info
)
8454 /* We start to count from ARG_POINTER. */
8455 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
8457 /* If it was realigned, take into account the fake frame. */
8458 if (stack_realign_drap
)
8460 if (ix86_static_chain_on_stack
)
8461 stack_size
+= UNITS_PER_WORD
;
8463 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8464 stack_size
+= UNITS_PER_WORD
;
8466 /* This over-estimates by 1 minimal-stack-alignment-unit but
8467 mitigates that by counting in the new return address slot. */
8468 current_function_dynamic_stack_size
8469 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8472 current_function_static_stack_size
= stack_size
;
8475 /* On SEH target with very large frame size, allocate an area to save
8476 SSE registers (as the very large allocation won't be described). */
8478 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
8479 && !sse_registers_saved
)
8481 HOST_WIDE_INT sse_size
8482 = frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
8484 gcc_assert (int_registers_saved
);
8486 /* No need to do stack checking as the area will be immediately
8488 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8489 GEN_INT (-sse_size
), -1,
8490 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8491 allocate
-= sse_size
;
8492 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8493 sse_registers_saved
= true;
8496 /* If stack clash protection is requested, then probe the stack. */
8497 if (allocate
>= 0 && flag_stack_clash_protection
)
8499 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, false);
8503 /* The stack has already been decremented by the instruction calling us
8504 so probe if the size is non-negative to preserve the protection area. */
8505 else if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
8507 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
8509 if (STACK_CHECK_MOVING_SP
)
8512 && !cfun
->calls_alloca
8513 && allocate
<= probe_interval
)
8518 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, true);
8525 HOST_WIDE_INT size
= allocate
;
8527 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
8528 size
= 0x80000000 - get_stack_check_protect () - 1;
8530 if (TARGET_STACK_PROBE
)
8532 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8534 if (size
> probe_interval
)
8535 ix86_emit_probe_stack_range (0, size
, int_registers_saved
);
8538 ix86_emit_probe_stack_range (0,
8539 size
+ get_stack_check_protect (),
8540 int_registers_saved
);
8544 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8546 if (size
> probe_interval
8547 && size
> get_stack_check_protect ())
8548 ix86_emit_probe_stack_range (get_stack_check_protect (),
8550 - get_stack_check_protect ()),
8551 int_registers_saved
);
8554 ix86_emit_probe_stack_range (get_stack_check_protect (), size
,
8555 int_registers_saved
);
8562 else if (!ix86_target_stack_probe ()
8563 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
8565 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8566 GEN_INT (-allocate
), -1,
8567 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8571 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
8573 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8574 bool eax_live
= ix86_eax_live_at_start_p ();
8575 bool r10_live
= false;
8578 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
8582 insn
= emit_insn (gen_push (eax
));
8583 allocate
-= UNITS_PER_WORD
;
8584 /* Note that SEH directives need to continue tracking the stack
8585 pointer even after the frame pointer has been set up. */
8586 if (sp_is_cfa_reg
|| TARGET_SEH
)
8589 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8590 RTX_FRAME_RELATED_P (insn
) = 1;
8591 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8592 gen_rtx_SET (stack_pointer_rtx
,
8593 plus_constant (Pmode
,
8601 r10
= gen_rtx_REG (Pmode
, R10_REG
);
8602 insn
= emit_insn (gen_push (r10
));
8603 allocate
-= UNITS_PER_WORD
;
8604 if (sp_is_cfa_reg
|| TARGET_SEH
)
8607 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8608 RTX_FRAME_RELATED_P (insn
) = 1;
8609 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8610 gen_rtx_SET (stack_pointer_rtx
,
8611 plus_constant (Pmode
,
8617 emit_move_insn (eax
, GEN_INT (allocate
));
8618 emit_insn (gen_allocate_stack_worker_probe (Pmode
, eax
, eax
));
8620 /* Use the fact that AX still contains ALLOCATE. */
8621 insn
= emit_insn (gen_pro_epilogue_adjust_stack_sub
8622 (Pmode
, stack_pointer_rtx
, stack_pointer_rtx
, eax
));
8624 if (sp_is_cfa_reg
|| TARGET_SEH
)
8627 m
->fs
.cfa_offset
+= allocate
;
8628 RTX_FRAME_RELATED_P (insn
) = 1;
8629 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8630 gen_rtx_SET (stack_pointer_rtx
,
8631 plus_constant (Pmode
, stack_pointer_rtx
,
8634 m
->fs
.sp_offset
+= allocate
;
8636 /* Use stack_pointer_rtx for relative addressing so that code works for
8637 realigned stack. But this means that we need a blockage to prevent
8638 stores based on the frame pointer from being scheduled before. */
8639 if (r10_live
&& eax_live
)
8641 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8642 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
8643 gen_frame_mem (word_mode
, t
));
8644 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
8645 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
8646 gen_frame_mem (word_mode
, t
));
8647 emit_insn (gen_memory_blockage ());
8649 else if (eax_live
|| r10_live
)
8651 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8652 emit_move_insn (gen_rtx_REG (word_mode
,
8653 (eax_live
? AX_REG
: R10_REG
)),
8654 gen_frame_mem (word_mode
, t
));
8655 emit_insn (gen_memory_blockage ());
8658 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8660 /* If we havn't already set up the frame pointer, do so now. */
8661 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8663 insn
= gen_add3_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
,
8664 GEN_INT (frame
.stack_pointer_offset
8665 - frame
.hard_frame_pointer_offset
));
8666 insn
= emit_insn (insn
);
8667 RTX_FRAME_RELATED_P (insn
) = 1;
8668 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
8670 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8671 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8672 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
8673 m
->fs
.fp_valid
= true;
8676 if (!int_registers_saved
)
8677 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8678 if (!sse_registers_saved
)
8679 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8680 else if (save_stub_call_needed
)
8681 ix86_emit_outlined_ms2sysv_save (frame
);
8683 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8685 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
8687 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
8688 insn
= emit_insn (gen_set_got (pic
));
8689 RTX_FRAME_RELATED_P (insn
) = 1;
8690 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
8691 emit_insn (gen_prologue_use (pic
));
8692 /* Deleting already emmitted SET_GOT if exist and allocated to
8693 REAL_PIC_OFFSET_TABLE_REGNUM. */
8694 ix86_elim_entry_set_got (pic
);
8697 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
8699 /* vDRAP is setup but after reload it turns out stack realign
8700 isn't necessary, here we will emit prologue to setup DRAP
8701 without stack realign adjustment */
8702 t
= choose_baseaddr (0, NULL
);
8703 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8706 /* Prevent instructions from being scheduled into register save push
8707 sequence when access to the redzone area is done through frame pointer.
8708 The offset between the frame pointer and the stack pointer is calculated
8709 relative to the value of the stack pointer at the end of the function
8710 prologue, and moving instructions that access redzone area via frame
8711 pointer inside push sequence violates this assumption. */
8712 if (frame_pointer_needed
&& frame
.red_zone_size
)
8713 emit_insn (gen_memory_blockage ());
8715 /* SEH requires that the prologue end within 256 bytes of the start of
8716 the function. Prevent instruction schedules that would extend that.
8717 Further, prevent alloca modifications to the stack pointer from being
8718 combined with prologue modifications. */
8720 emit_insn (gen_prologue_use (stack_pointer_rtx
));
8723 /* Emit code to restore REG using a POP insn. */
8726 ix86_emit_restore_reg_using_pop (rtx reg
)
8728 struct machine_function
*m
= cfun
->machine
;
8729 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
8731 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
8732 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8734 if (m
->fs
.cfa_reg
== crtl
->drap_reg
8735 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
8737 /* Previously we'd represented the CFA as an expression
8738 like *(%ebp - 8). We've just popped that value from
8739 the stack, which means we need to reset the CFA to
8740 the drap register. This will remain until we restore
8741 the stack pointer. */
8742 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8743 RTX_FRAME_RELATED_P (insn
) = 1;
8745 /* This means that the DRAP register is valid for addressing too. */
8746 m
->fs
.drap_valid
= true;
8750 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8752 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8753 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8754 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
8755 RTX_FRAME_RELATED_P (insn
) = 1;
8757 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8760 /* When the frame pointer is the CFA, and we pop it, we are
8761 swapping back to the stack pointer as the CFA. This happens
8762 for stack frames that don't allocate other data, so we assume
8763 the stack pointer is now pointing at the return address, i.e.
8764 the function entry state, which makes the offset be 1 word. */
8765 if (reg
== hard_frame_pointer_rtx
)
8767 m
->fs
.fp_valid
= false;
8768 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8770 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8771 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8773 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8774 plus_constant (Pmode
, stack_pointer_rtx
,
8776 RTX_FRAME_RELATED_P (insn
) = 1;
8781 /* Emit code to restore saved registers using POP insns. */
8784 ix86_emit_restore_regs_using_pop (void)
8788 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8789 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
8790 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
8793 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8794 omits the emit and only attaches the notes. */
8797 ix86_emit_leave (rtx_insn
*insn
)
8799 struct machine_function
*m
= cfun
->machine
;
8802 insn
= emit_insn (gen_leave (word_mode
));
8804 ix86_add_queued_cfa_restore_notes (insn
);
8806 gcc_assert (m
->fs
.fp_valid
);
8807 m
->fs
.sp_valid
= true;
8808 m
->fs
.sp_realigned
= false;
8809 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
8810 m
->fs
.fp_valid
= false;
8812 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8814 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8815 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
8817 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8818 plus_constant (Pmode
, stack_pointer_rtx
,
8820 RTX_FRAME_RELATED_P (insn
) = 1;
8822 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
8826 /* Emit code to restore saved registers using MOV insns.
8827 First register is restored from CFA - CFA_OFFSET. */
8829 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8830 bool maybe_eh_return
)
8832 struct machine_function
*m
= cfun
->machine
;
8835 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8836 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8838 rtx reg
= gen_rtx_REG (word_mode
, regno
);
8842 mem
= choose_baseaddr (cfa_offset
, NULL
);
8843 mem
= gen_frame_mem (word_mode
, mem
);
8844 insn
= emit_move_insn (reg
, mem
);
8846 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8848 /* Previously we'd represented the CFA as an expression
8849 like *(%ebp - 8). We've just popped that value from
8850 the stack, which means we need to reset the CFA to
8851 the drap register. This will remain until we restore
8852 the stack pointer. */
8853 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8854 RTX_FRAME_RELATED_P (insn
) = 1;
8856 /* This means that the DRAP register is valid for addressing. */
8857 m
->fs
.drap_valid
= true;
8860 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8862 cfa_offset
-= UNITS_PER_WORD
;
8866 /* Emit code to restore saved registers using MOV insns.
8867 First register is restored from CFA - CFA_OFFSET. */
8869 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8870 bool maybe_eh_return
)
8874 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8875 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8877 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
8879 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
8881 mem
= choose_baseaddr (cfa_offset
, &align
);
8882 mem
= gen_rtx_MEM (V4SFmode
, mem
);
8884 /* The location aligment depends upon the base register. */
8885 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
8886 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
8887 set_mem_align (mem
, align
);
8888 emit_insn (gen_rtx_SET (reg
, mem
));
8890 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8892 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
8897 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
8898 bool use_call
, int style
)
8900 struct machine_function
*m
= cfun
->machine
;
8901 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8902 + m
->call_ms2sysv_extra_regs
;
8904 unsigned int elems_needed
, align
, i
, vi
= 0;
8907 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
8909 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8910 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
8911 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
8912 rtx rsi_frame_load
= NULL_RTX
;
8913 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
8914 enum xlogue_stub stub
;
8916 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
8918 /* If using a realigned stack, we should never start with padding. */
8919 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
8921 /* Setup RSI as the stub's base pointer. */
8922 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8923 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
8924 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8926 emit_insn (gen_rtx_SET (rsi
, tmp
));
8928 /* Get a symbol for the stub. */
8929 if (frame_pointer_needed
)
8930 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
8931 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
8933 stub
= use_call
? XLOGUE_STUB_RESTORE
8934 : XLOGUE_STUB_RESTORE_TAIL
;
8935 sym
= xlogue
.get_stub_rtx (stub
);
8937 elems_needed
= ncregs
;
8941 elems_needed
+= frame_pointer_needed
? 5 : 3;
8942 v
= rtvec_alloc (elems_needed
);
8944 /* We call the epilogue stub when we need to pop incoming args or we are
8945 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8946 epilogue stub and it is the tail-call. */
8948 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8951 RTVEC_ELT (v
, vi
++) = ret_rtx
;
8952 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8953 if (frame_pointer_needed
)
8955 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
8956 gcc_assert (m
->fs
.fp_valid
);
8957 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
8959 tmp
= plus_constant (DImode
, rbp
, 8);
8960 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
8961 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
8962 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
8963 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
8967 /* If no hard frame pointer, we set R10 to the SP restore value. */
8968 gcc_assert (!m
->fs
.fp_valid
);
8969 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8970 gcc_assert (m
->fs
.sp_valid
);
8972 r10
= gen_rtx_REG (DImode
, R10_REG
);
8973 tmp
= plus_constant (Pmode
, rsi
, stub_ptr_offset
);
8974 emit_insn (gen_rtx_SET (r10
, tmp
));
8976 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
8980 /* Generate frame load insns and restore notes. */
8981 for (i
= 0; i
< ncregs
; ++i
)
8983 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8984 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
8985 rtx reg
, frame_load
;
8987 reg
= gen_rtx_REG (mode
, r
.regno
);
8988 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
8990 /* Save RSI frame load insn & note to add last. */
8991 if (r
.regno
== SI_REG
)
8993 gcc_assert (!rsi_frame_load
);
8994 rsi_frame_load
= frame_load
;
8995 rsi_restore_offset
= r
.offset
;
8999 RTVEC_ELT (v
, vi
++) = frame_load
;
9000 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
9004 /* Add RSI frame load & restore note at the end. */
9005 gcc_assert (rsi_frame_load
);
9006 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
9007 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
9008 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
9009 rsi_restore_offset
);
9011 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9012 if (!use_call
&& !frame_pointer_needed
)
9014 gcc_assert (m
->fs
.sp_valid
);
9015 gcc_assert (!m
->fs
.sp_realigned
);
9017 /* At this point, R10 should point to frame.stack_realign_offset. */
9018 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9019 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9020 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
9023 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
9024 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
9026 insn
= emit_insn (tmp
);
9029 insn
= emit_jump_insn (tmp
);
9030 JUMP_LABEL (insn
) = ret_rtx
;
9032 if (frame_pointer_needed
)
9033 ix86_emit_leave (insn
);
9036 /* Need CFA adjust note. */
9037 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
9038 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
9042 RTX_FRAME_RELATED_P (insn
) = true;
9043 ix86_add_queued_cfa_restore_notes (insn
);
9045 /* If we're not doing a tail-call, we need to adjust the stack. */
9046 if (use_call
&& m
->fs
.sp_valid
)
9048 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9049 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9050 GEN_INT (dealloc
), style
,
9051 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9055 /* Restore function stack, frame, and registers. */
9058 ix86_expand_epilogue (int style
)
9060 struct machine_function
*m
= cfun
->machine
;
9061 struct machine_frame_state frame_state_save
= m
->fs
;
9062 bool restore_regs_via_mov
;
9064 bool restore_stub_is_tail
= false;
9066 if (ix86_function_naked (current_function_decl
))
9068 /* The program should not reach this point. */
9069 emit_insn (gen_ud2 ());
9073 ix86_finalize_stack_frame_flags ();
9074 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
9076 m
->fs
.sp_realigned
= stack_realign_fp
;
9077 m
->fs
.sp_valid
= stack_realign_fp
9078 || !frame_pointer_needed
9079 || crtl
->sp_is_unchanging
;
9080 gcc_assert (!m
->fs
.sp_valid
9081 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
9083 /* The FP must be valid if the frame pointer is present. */
9084 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
9085 gcc_assert (!m
->fs
.fp_valid
9086 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
9088 /* We must have *some* valid pointer to the stack frame. */
9089 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
9091 /* The DRAP is never valid at this point. */
9092 gcc_assert (!m
->fs
.drap_valid
);
9094 /* See the comment about red zone and frame
9095 pointer usage in ix86_expand_prologue. */
9096 if (frame_pointer_needed
&& frame
.red_zone_size
)
9097 emit_insn (gen_memory_blockage ());
9099 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
9100 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
9102 /* Determine the CFA offset of the end of the red-zone. */
9103 m
->fs
.red_zone_offset
= 0;
9104 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
9106 /* The red-zone begins below return address and error code in
9107 exception handler. */
9108 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
9110 /* When the register save area is in the aligned portion of
9111 the stack, determine the maximum runtime displacement that
9112 matches up with the aligned frame. */
9113 if (stack_realign_drap
)
9114 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
9118 HOST_WIDE_INT reg_save_offset
= frame
.reg_save_offset
;
9120 /* Special care must be taken for the normal return case of a function
9121 using eh_return: the eax and edx registers are marked as saved, but
9122 not restored along this path. Adjust the save location to match. */
9123 if (crtl
->calls_eh_return
&& style
!= 2)
9124 reg_save_offset
-= 2 * UNITS_PER_WORD
;
9126 /* EH_RETURN requires the use of moves to function properly. */
9127 if (crtl
->calls_eh_return
)
9128 restore_regs_via_mov
= true;
9129 /* SEH requires the use of pops to identify the epilogue. */
9130 else if (TARGET_SEH
)
9131 restore_regs_via_mov
= false;
9132 /* If we're only restoring one register and sp cannot be used then
9133 using a move instruction to restore the register since it's
9134 less work than reloading sp and popping the register. */
9135 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
9136 restore_regs_via_mov
= true;
9137 else if (TARGET_EPILOGUE_USING_MOVE
9138 && cfun
->machine
->use_fast_prologue_epilogue
9140 || m
->fs
.sp_offset
!= reg_save_offset
))
9141 restore_regs_via_mov
= true;
9142 else if (frame_pointer_needed
9144 && m
->fs
.sp_offset
!= reg_save_offset
)
9145 restore_regs_via_mov
= true;
9146 else if (frame_pointer_needed
9148 && cfun
->machine
->use_fast_prologue_epilogue
9149 && frame
.nregs
== 1)
9150 restore_regs_via_mov
= true;
9152 restore_regs_via_mov
= false;
9154 if (restore_regs_via_mov
|| frame
.nsseregs
)
9156 /* Ensure that the entire register save area is addressable via
9157 the stack pointer, if we will restore SSE regs via sp. */
9159 && m
->fs
.sp_offset
> 0x7fffffff
9160 && sp_valid_at (frame
.stack_realign_offset
+ 1)
9161 && (frame
.nsseregs
+ frame
.nregs
) != 0)
9163 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9164 GEN_INT (m
->fs
.sp_offset
9165 - frame
.sse_reg_save_offset
),
9167 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9171 /* If there are any SSE registers to restore, then we have to do it
9172 via moves, since there's obviously no pop for SSE regs. */
9174 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
9177 if (m
->call_ms2sysv
)
9179 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
9181 /* We cannot use a tail-call for the stub if:
9182 1. We have to pop incoming args,
9183 2. We have additional int regs to restore, or
9184 3. A sibling call will be the tail-call, or
9185 4. We are emitting an eh_return_internal epilogue.
9187 TODO: Item 4 has not yet tested!
9189 If any of the above are true, we will call the stub rather than
9191 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
9192 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
9195 /* If using out-of-line stub that is a tail-call, then...*/
9196 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
9198 /* TODO: parinoid tests. (remove eventually) */
9199 gcc_assert (m
->fs
.sp_valid
);
9200 gcc_assert (!m
->fs
.sp_realigned
);
9201 gcc_assert (!m
->fs
.fp_valid
);
9202 gcc_assert (!m
->fs
.realigned
);
9203 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
9204 gcc_assert (!crtl
->drap_reg
);
9205 gcc_assert (!frame
.nregs
);
9207 else if (restore_regs_via_mov
)
9212 ix86_emit_restore_regs_using_mov (reg_save_offset
, style
== 2);
9214 /* eh_return epilogues need %ecx added to the stack pointer. */
9217 rtx sa
= EH_RETURN_STACKADJ_RTX
;
9220 /* %ecx can't be used for both DRAP register and eh_return. */
9222 gcc_assert (REGNO (crtl
->drap_reg
) != CX_REG
);
9224 /* regparm nested functions don't work with eh_return. */
9225 gcc_assert (!ix86_static_chain_on_stack
);
9227 if (frame_pointer_needed
)
9229 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
9230 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
9231 emit_insn (gen_rtx_SET (sa
, t
));
9233 /* NB: eh_return epilogues must restore the frame pointer
9234 in word_mode since the upper 32 bits of RBP register
9235 can have any values. */
9236 t
= gen_frame_mem (word_mode
, hard_frame_pointer_rtx
);
9237 rtx frame_reg
= gen_rtx_REG (word_mode
,
9238 HARD_FRAME_POINTER_REGNUM
);
9239 insn
= emit_move_insn (frame_reg
, t
);
9241 /* Note that we use SA as a temporary CFA, as the return
9242 address is at the proper place relative to it. We
9243 pretend this happens at the FP restore insn because
9244 prior to this insn the FP would be stored at the wrong
9245 offset relative to SA, and after this insn we have no
9246 other reasonable register to use for the CFA. We don't
9247 bother resetting the CFA to the SP for the duration of
9248 the return insn, unless the control flow instrumentation
9249 is done. In this case the SP is used later and we have
9250 to reset CFA to SP. */
9251 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9252 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
9253 ix86_add_queued_cfa_restore_notes (insn
);
9254 add_reg_note (insn
, REG_CFA_RESTORE
, frame_reg
);
9255 RTX_FRAME_RELATED_P (insn
) = 1;
9258 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9259 m
->fs
.fp_valid
= false;
9261 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
9263 flag_cf_protection
);
9267 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
9268 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
9269 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
9270 ix86_add_queued_cfa_restore_notes (insn
);
9272 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9273 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
9275 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9276 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9277 plus_constant (Pmode
, stack_pointer_rtx
,
9279 RTX_FRAME_RELATED_P (insn
) = 1;
9282 m
->fs
.sp_offset
= UNITS_PER_WORD
;
9283 m
->fs
.sp_valid
= true;
9284 m
->fs
.sp_realigned
= false;
9289 /* SEH requires that the function end with (1) a stack adjustment
9290 if necessary, (2) a sequence of pops, and (3) a return or
9291 jump instruction. Prevent insns from the function body from
9292 being scheduled into this sequence. */
9295 /* Prevent a catch region from being adjacent to the standard
9296 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9297 nor several other flags that would be interesting to test are
9299 if (flag_non_call_exceptions
)
9300 emit_insn (gen_nops (const1_rtx
));
9302 emit_insn (gen_blockage ());
9305 /* First step is to deallocate the stack frame so that we can
9306 pop the registers. If the stack pointer was realigned, it needs
9307 to be restored now. Also do it on SEH target for very large
9308 frame as the emitted instructions aren't allowed by the ABI
9310 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
9312 && (m
->fs
.sp_offset
- reg_save_offset
9313 >= SEH_MAX_FRAME_SIZE
)))
9315 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
9316 GEN_INT (m
->fs
.fp_offset
9320 else if (m
->fs
.sp_offset
!= reg_save_offset
)
9322 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9323 GEN_INT (m
->fs
.sp_offset
9326 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9329 ix86_emit_restore_regs_using_pop ();
9332 /* If we used a stack pointer and haven't already got rid of it,
9336 /* If the stack pointer is valid and pointing at the frame
9337 pointer store address, then we only need a pop. */
9338 if (sp_valid_at (frame
.hfp_save_offset
)
9339 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
9340 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
9341 /* Leave results in shorter dependency chains on CPUs that are
9342 able to grok it fast. */
9343 else if (TARGET_USE_LEAVE
9344 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
9345 || !cfun
->machine
->use_fast_prologue_epilogue
)
9346 ix86_emit_leave (NULL
);
9349 pro_epilogue_adjust_stack (stack_pointer_rtx
,
9350 hard_frame_pointer_rtx
,
9351 const0_rtx
, style
, !using_drap
);
9352 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
9358 int param_ptr_offset
= UNITS_PER_WORD
;
9361 gcc_assert (stack_realign_drap
);
9363 if (ix86_static_chain_on_stack
)
9364 param_ptr_offset
+= UNITS_PER_WORD
;
9365 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9366 param_ptr_offset
+= UNITS_PER_WORD
;
9368 insn
= emit_insn (gen_rtx_SET
9370 plus_constant (Pmode
, crtl
->drap_reg
,
9371 -param_ptr_offset
)));
9372 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9373 m
->fs
.cfa_offset
= param_ptr_offset
;
9374 m
->fs
.sp_offset
= param_ptr_offset
;
9375 m
->fs
.realigned
= false;
9377 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9378 plus_constant (Pmode
, stack_pointer_rtx
,
9380 RTX_FRAME_RELATED_P (insn
) = 1;
9382 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9383 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
9386 /* At this point the stack pointer must be valid, and we must have
9387 restored all of the registers. We may not have deallocated the
9388 entire stack frame. We've delayed this until now because it may
9389 be possible to merge the local stack deallocation with the
9390 deallocation forced by ix86_static_chain_on_stack. */
9391 gcc_assert (m
->fs
.sp_valid
);
9392 gcc_assert (!m
->fs
.sp_realigned
);
9393 gcc_assert (!m
->fs
.fp_valid
);
9394 gcc_assert (!m
->fs
.realigned
);
9395 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
9397 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9398 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
9402 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9404 /* Sibcall epilogues don't want a return instruction. */
9407 m
->fs
= frame_state_save
;
9411 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
9412 emit_jump_insn (gen_interrupt_return ());
9413 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
9415 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
9417 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9418 address, do explicit add, and jump indirectly to the caller. */
9420 if (crtl
->args
.pops_args
>= 65536)
9422 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
9425 /* There is no "pascal" calling convention in any 64bit ABI. */
9426 gcc_assert (!TARGET_64BIT
);
9428 insn
= emit_insn (gen_pop (ecx
));
9429 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9430 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9432 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9433 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9434 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9435 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9436 RTX_FRAME_RELATED_P (insn
) = 1;
9438 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9440 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9443 emit_jump_insn (gen_simple_return_pop_internal (popc
));
9445 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
9447 /* In case of return from EH a simple return cannot be used
9448 as a return address will be compared with a shadow stack
9449 return address. Use indirect jump instead. */
9450 if (style
== 2 && flag_cf_protection
)
9452 /* Register used in indirect jump must be in word_mode. But
9453 Pmode may not be the same as word_mode for x32. */
9454 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
9457 insn
= emit_insn (gen_pop (ecx
));
9458 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9459 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9461 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9462 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9463 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9464 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9465 RTX_FRAME_RELATED_P (insn
) = 1;
9467 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9470 emit_jump_insn (gen_simple_return_internal ());
9473 /* Restore the state back to the state from the prologue,
9474 so that it's correct for the next epilogue. */
9475 m
->fs
= frame_state_save
;
9478 /* Reset from the function's potential modifications. */
9481 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
9483 if (pic_offset_table_rtx
9484 && !ix86_use_pseudo_pic_reg ())
9485 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9489 rtx_insn
*insn
= get_last_insn ();
9490 rtx_insn
*deleted_debug_label
= NULL
;
9492 /* Mach-O doesn't support labels at the end of objects, so if
9493 it looks like we might want one, take special action.
9494 First, collect any sequence of deleted debug labels. */
9497 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
9499 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9500 notes only, instead set their CODE_LABEL_NUMBER to -1,
9501 otherwise there would be code generation differences
9502 in between -g and -g0. */
9503 if (NOTE_P (insn
) && NOTE_KIND (insn
)
9504 == NOTE_INSN_DELETED_DEBUG_LABEL
)
9505 deleted_debug_label
= insn
;
9506 insn
= PREV_INSN (insn
);
9512 then this needs to be detected, so skip past the barrier. */
9514 if (insn
&& BARRIER_P (insn
))
9515 insn
= PREV_INSN (insn
);
9517 /* Up to now we've only seen notes or barriers. */
9522 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
9523 /* Trailing label. */
9524 fputs ("\tnop\n", file
);
9525 else if (cfun
&& ! cfun
->is_thunk
)
9527 /* See if we have a completely empty function body, skipping
9528 the special case of the picbase thunk emitted as asm. */
9529 while (insn
&& ! INSN_P (insn
))
9530 insn
= PREV_INSN (insn
);
9531 /* If we don't find any insns, we've got an empty function body;
9532 I.e. completely empty - without a return or branch. This is
9533 taken as the case where a function body has been removed
9534 because it contains an inline __builtin_unreachable(). GCC
9535 declares that reaching __builtin_unreachable() means UB so
9536 we're not obliged to do anything special; however, we want
9537 non-zero-sized function bodies. To meet this, and help the
9538 user out, let's trap the case. */
9540 fputs ("\tud2\n", file
);
9543 else if (deleted_debug_label
)
9544 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
9545 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
9546 CODE_LABEL_NUMBER (insn
) = -1;
9550 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9553 ix86_print_patchable_function_entry (FILE *file
,
9554 unsigned HOST_WIDE_INT patch_area_size
,
9557 if (cfun
->machine
->function_label_emitted
)
9559 /* NB: When ix86_print_patchable_function_entry is called after
9560 function table has been emitted, we have inserted or queued
9561 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9562 place. There is nothing to do here. */
9566 default_print_patchable_function_entry (file
, patch_area_size
,
9570 /* Output patchable area. NB: default_print_patchable_function_entry
9571 isn't available in i386.md. */
9574 ix86_output_patchable_area (unsigned int patch_area_size
,
9577 default_print_patchable_function_entry (asm_out_file
,
9582 /* Return a scratch register to use in the split stack prologue. The
9583 split stack prologue is used for -fsplit-stack. It is the first
9584 instructions in the function, even before the regular prologue.
9585 The scratch register can be any caller-saved register which is not
9586 used for parameters or for the static chain. */
9589 split_stack_prologue_scratch_regno (void)
9595 bool is_fastcall
, is_thiscall
;
9598 is_fastcall
= (lookup_attribute ("fastcall",
9599 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9601 is_thiscall
= (lookup_attribute ("thiscall",
9602 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9604 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
9608 if (DECL_STATIC_CHAIN (cfun
->decl
))
9610 sorry ("%<-fsplit-stack%> does not support fastcall with "
9612 return INVALID_REGNUM
;
9616 else if (is_thiscall
)
9618 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9622 else if (regparm
< 3)
9624 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9630 sorry ("%<-fsplit-stack%> does not support 2 register "
9631 "parameters for a nested function");
9632 return INVALID_REGNUM
;
9639 /* FIXME: We could make this work by pushing a register
9640 around the addition and comparison. */
9641 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9642 return INVALID_REGNUM
;
9647 /* A SYMBOL_REF for the function which allocates new stackspace for
9650 static GTY(()) rtx split_stack_fn
;
9652 /* A SYMBOL_REF for the more stack function when using the large
9655 static GTY(()) rtx split_stack_fn_large
;
9657 /* Return location of the stack guard value in the TLS block. */
9660 ix86_split_stack_guard (void)
9663 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
9666 gcc_assert (flag_split_stack
);
9668 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9669 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
9674 r
= GEN_INT (offset
);
9675 r
= gen_const_mem (Pmode
, r
);
9676 set_mem_addr_space (r
, as
);
9681 /* Handle -fsplit-stack. These are the first instructions in the
9682 function, even before the regular prologue. */
9685 ix86_expand_split_stack_prologue (void)
9687 HOST_WIDE_INT allocate
;
9688 unsigned HOST_WIDE_INT args_size
;
9689 rtx_code_label
*label
;
9690 rtx limit
, current
, allocate_rtx
, call_fusage
;
9691 rtx_insn
*call_insn
;
9692 rtx scratch_reg
= NULL_RTX
;
9693 rtx_code_label
*varargs_label
= NULL
;
9696 gcc_assert (flag_split_stack
&& reload_completed
);
9698 ix86_finalize_stack_frame_flags ();
9699 struct ix86_frame
&frame
= cfun
->machine
->frame
;
9700 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
9702 /* This is the label we will branch to if we have enough stack
9703 space. We expect the basic block reordering pass to reverse this
9704 branch if optimizing, so that we branch in the unlikely case. */
9705 label
= gen_label_rtx ();
9707 /* We need to compare the stack pointer minus the frame size with
9708 the stack boundary in the TCB. The stack boundary always gives
9709 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9710 can compare directly. Otherwise we need to do an addition. */
9712 limit
= ix86_split_stack_guard ();
9714 if (allocate
< SPLIT_STACK_AVAILABLE
)
9715 current
= stack_pointer_rtx
;
9718 unsigned int scratch_regno
;
9721 /* We need a scratch register to hold the stack pointer minus
9722 the required frame size. Since this is the very start of the
9723 function, the scratch register can be any caller-saved
9724 register which is not used for parameters. */
9725 offset
= GEN_INT (- allocate
);
9726 scratch_regno
= split_stack_prologue_scratch_regno ();
9727 if (scratch_regno
== INVALID_REGNUM
)
9729 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9730 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
9732 /* We don't use gen_add in this case because it will
9733 want to split to lea, but when not optimizing the insn
9734 will not be split after this point. */
9735 emit_insn (gen_rtx_SET (scratch_reg
,
9736 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9741 emit_move_insn (scratch_reg
, offset
);
9742 emit_insn (gen_add2_insn (scratch_reg
, stack_pointer_rtx
));
9744 current
= scratch_reg
;
9747 ix86_expand_branch (GEU
, current
, limit
, label
);
9748 rtx_insn
*jump_insn
= get_last_insn ();
9749 JUMP_LABEL (jump_insn
) = label
;
9751 /* Mark the jump as very likely to be taken. */
9752 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
9754 if (split_stack_fn
== NULL_RTX
)
9756 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
9757 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
9759 fn
= split_stack_fn
;
9761 /* Get more stack space. We pass in the desired stack space and the
9762 size of the arguments to copy to the new stack. In 32-bit mode
9763 we push the parameters; __morestack will return on a new stack
9764 anyhow. In 64-bit mode we pass the parameters in r10 and
9766 allocate_rtx
= GEN_INT (allocate
);
9767 args_size
= crtl
->args
.size
>= 0 ? (HOST_WIDE_INT
) crtl
->args
.size
: 0;
9768 call_fusage
= NULL_RTX
;
9774 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
9775 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
9777 /* If this function uses a static chain, it will be in %r10.
9778 Preserve it across the call to __morestack. */
9779 if (DECL_STATIC_CHAIN (cfun
->decl
))
9783 rax
= gen_rtx_REG (word_mode
, AX_REG
);
9784 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
9785 use_reg (&call_fusage
, rax
);
9788 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
9791 HOST_WIDE_INT argval
;
9793 gcc_assert (Pmode
== DImode
);
9794 /* When using the large model we need to load the address
9795 into a register, and we've run out of registers. So we
9796 switch to a different calling convention, and we call a
9797 different function: __morestack_large. We pass the
9798 argument size in the upper 32 bits of r10 and pass the
9799 frame size in the lower 32 bits. */
9800 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
9801 gcc_assert ((args_size
& 0xffffffff) == args_size
);
9803 if (split_stack_fn_large
== NULL_RTX
)
9805 split_stack_fn_large
9806 = gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
9807 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
9809 if (ix86_cmodel
== CM_LARGE_PIC
)
9811 rtx_code_label
*label
;
9814 label
= gen_label_rtx ();
9816 LABEL_PRESERVE_P (label
) = 1;
9817 emit_insn (gen_set_rip_rex64 (reg10
, label
));
9818 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
9819 emit_insn (gen_add2_insn (reg10
, reg11
));
9820 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
9822 x
= gen_rtx_CONST (Pmode
, x
);
9823 emit_move_insn (reg11
, x
);
9824 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
9825 x
= gen_const_mem (Pmode
, x
);
9826 emit_move_insn (reg11
, x
);
9829 emit_move_insn (reg11
, split_stack_fn_large
);
9833 argval
= ((args_size
<< 16) << 16) + allocate
;
9834 emit_move_insn (reg10
, GEN_INT (argval
));
9838 emit_move_insn (reg10
, allocate_rtx
);
9839 emit_move_insn (reg11
, GEN_INT (args_size
));
9840 use_reg (&call_fusage
, reg11
);
9843 use_reg (&call_fusage
, reg10
);
9847 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
9848 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
9849 insn
= emit_insn (gen_push (allocate_rtx
));
9850 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
9851 pop
= GEN_INT (2 * UNITS_PER_WORD
);
9853 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
9854 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
9856 add_function_usage_to (call_insn
, call_fusage
);
9858 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
9859 /* Indicate that this function can't jump to non-local gotos. */
9860 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
9862 /* In order to make call/return prediction work right, we now need
9863 to execute a return instruction. See
9864 libgcc/config/i386/morestack.S for the details on how this works.
9866 For flow purposes gcc must not see this as a return
9867 instruction--we need control flow to continue at the subsequent
9868 label. Therefore, we use an unspec. */
9869 gcc_assert (crtl
->args
.pops_args
< 65536);
9871 = emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
9873 if ((flag_cf_protection
& CF_BRANCH
))
9875 /* Insert ENDBR since __morestack will jump back here via indirect
9877 rtx cet_eb
= gen_nop_endbr ();
9878 emit_insn_after (cet_eb
, ret_insn
);
9881 /* If we are in 64-bit mode and this function uses a static chain,
9882 we saved %r10 in %rax before calling _morestack. */
9883 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
9884 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
9885 gen_rtx_REG (word_mode
, AX_REG
));
9887 /* If this function calls va_start, we need to store a pointer to
9888 the arguments on the old stack, because they may not have been
9889 all copied to the new stack. At this point the old stack can be
9890 found at the frame pointer value used by __morestack, because
9891 __morestack has set that up before calling back to us. Here we
9892 store that pointer in a scratch register, and in
9893 ix86_expand_prologue we store the scratch register in a stack
9895 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9897 unsigned int scratch_regno
;
9901 scratch_regno
= split_stack_prologue_scratch_regno ();
9902 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9903 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
9907 return address within this function
9908 return address of caller of this function
9910 So we add three words to get to the stack arguments.
9914 return address within this function
9915 first argument to __morestack
9916 second argument to __morestack
9917 return address of caller of this function
9919 So we add five words to get to the stack arguments.
9921 words
= TARGET_64BIT
? 3 : 5;
9922 emit_insn (gen_rtx_SET (scratch_reg
,
9923 plus_constant (Pmode
, frame_reg
,
9924 words
* UNITS_PER_WORD
)));
9926 varargs_label
= gen_label_rtx ();
9927 emit_jump_insn (gen_jump (varargs_label
));
9928 JUMP_LABEL (get_last_insn ()) = varargs_label
;
9934 LABEL_NUSES (label
) = 1;
9936 /* If this function calls va_start, we now have to set the scratch
9937 register for the case where we do not call __morestack. In this
9938 case we need to set it based on the stack pointer. */
9939 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9941 emit_insn (gen_rtx_SET (scratch_reg
,
9942 plus_constant (Pmode
, stack_pointer_rtx
,
9945 emit_label (varargs_label
);
9946 LABEL_NUSES (varargs_label
) = 1;
9950 /* We may have to tell the dataflow pass that the split stack prologue
9951 is initializing a scratch register. */
9954 ix86_live_on_entry (bitmap regs
)
9956 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9958 gcc_assert (flag_split_stack
);
9959 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
9963 /* Extract the parts of an RTL expression that is a valid memory address
9964 for an instruction. Return 0 if the structure of the address is
9965 grossly off. Return -1 if the address contains ASHIFT, so it is not
9966 strictly valid, but still used for computing length of lea instruction. */
9969 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
9971 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
9972 rtx base_reg
, index_reg
;
9973 HOST_WIDE_INT scale
= 1;
9974 rtx scale_rtx
= NULL_RTX
;
9977 addr_space_t seg
= ADDR_SPACE_GENERIC
;
9979 /* Allow zero-extended SImode addresses,
9980 they will be emitted with addr32 prefix. */
9981 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
9983 if (GET_CODE (addr
) == ZERO_EXTEND
9984 && GET_MODE (XEXP (addr
, 0)) == SImode
)
9986 addr
= XEXP (addr
, 0);
9987 if (CONST_INT_P (addr
))
9990 else if (GET_CODE (addr
) == AND
9991 && const_32bit_mask (XEXP (addr
, 1), DImode
))
9993 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
9994 if (addr
== NULL_RTX
)
9997 if (CONST_INT_P (addr
))
10002 /* Allow SImode subregs of DImode addresses,
10003 they will be emitted with addr32 prefix. */
10004 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
10006 if (SUBREG_P (addr
)
10007 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
10009 addr
= SUBREG_REG (addr
);
10010 if (CONST_INT_P (addr
))
10017 else if (SUBREG_P (addr
))
10019 if (REG_P (SUBREG_REG (addr
)))
10024 else if (GET_CODE (addr
) == PLUS
)
10026 rtx addends
[4], op
;
10034 addends
[n
++] = XEXP (op
, 1);
10037 while (GET_CODE (op
) == PLUS
);
10042 for (i
= n
; i
>= 0; --i
)
10045 switch (GET_CODE (op
))
10050 index
= XEXP (op
, 0);
10051 scale_rtx
= XEXP (op
, 1);
10057 index
= XEXP (op
, 0);
10058 tmp
= XEXP (op
, 1);
10059 if (!CONST_INT_P (tmp
))
10061 scale
= INTVAL (tmp
);
10062 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10064 scale
= 1 << scale
;
10069 if (GET_CODE (op
) != UNSPEC
)
10074 if (XINT (op
, 1) == UNSPEC_TP
10075 && TARGET_TLS_DIRECT_SEG_REFS
10076 && seg
== ADDR_SPACE_GENERIC
)
10077 seg
= DEFAULT_TLS_SEG_REG
;
10083 if (!REG_P (SUBREG_REG (op
)))
10110 else if (GET_CODE (addr
) == MULT
)
10112 index
= XEXP (addr
, 0); /* index*scale */
10113 scale_rtx
= XEXP (addr
, 1);
10115 else if (GET_CODE (addr
) == ASHIFT
)
10117 /* We're called for lea too, which implements ashift on occasion. */
10118 index
= XEXP (addr
, 0);
10119 tmp
= XEXP (addr
, 1);
10120 if (!CONST_INT_P (tmp
))
10122 scale
= INTVAL (tmp
);
10123 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10125 scale
= 1 << scale
;
10129 disp
= addr
; /* displacement */
10135 else if (SUBREG_P (index
)
10136 && REG_P (SUBREG_REG (index
)))
10142 /* Extract the integral value of scale. */
10145 if (!CONST_INT_P (scale_rtx
))
10147 scale
= INTVAL (scale_rtx
);
10150 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
10151 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
10153 /* Avoid useless 0 displacement. */
10154 if (disp
== const0_rtx
&& (base
|| index
))
10157 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10158 if (base_reg
&& index_reg
&& scale
== 1
10159 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
10160 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
10161 || REGNO (index_reg
) == SP_REG
))
10163 std::swap (base
, index
);
10164 std::swap (base_reg
, index_reg
);
10167 /* Special case: %ebp cannot be encoded as a base without a displacement.
10169 if (!disp
&& base_reg
10170 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
10171 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
10172 || REGNO (base_reg
) == BP_REG
10173 || REGNO (base_reg
) == R13_REG
))
10176 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10177 Avoid this by transforming to [%esi+0].
10178 Reload calls address legitimization without cfun defined, so we need
10179 to test cfun for being non-NULL. */
10180 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
10181 && base_reg
&& !index_reg
&& !disp
10182 && REGNO (base_reg
) == SI_REG
)
10185 /* Special case: encode reg+reg instead of reg*2. */
10186 if (!base
&& index
&& scale
== 2)
10187 base
= index
, base_reg
= index_reg
, scale
= 1;
10189 /* Special case: scaling cannot be encoded without base or displacement. */
10190 if (!base
&& !disp
&& index
&& scale
!= 1)
10194 out
->index
= index
;
10196 out
->scale
= scale
;
10202 /* Return cost of the memory address x.
10203 For i386, it is better to use a complex address than let gcc copy
10204 the address into a reg and make a new pseudo. But not if the address
10205 requires to two regs - that would mean more pseudos with longer
10208 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
10210 struct ix86_address parts
;
10212 int ok
= ix86_decompose_address (x
, &parts
);
10216 if (parts
.base
&& SUBREG_P (parts
.base
))
10217 parts
.base
= SUBREG_REG (parts
.base
);
10218 if (parts
.index
&& SUBREG_P (parts
.index
))
10219 parts
.index
= SUBREG_REG (parts
.index
);
10221 /* Attempt to minimize number of registers in the address by increasing
10222 address cost for each used register. We don't increase address cost
10223 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10224 is not invariant itself it most likely means that base or index is not
10225 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10226 which is not profitable for x86. */
10228 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
10229 && (current_pass
->type
== GIMPLE_PASS
10230 || !pic_offset_table_rtx
10231 || !REG_P (parts
.base
)
10232 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
10236 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
10237 && (current_pass
->type
== GIMPLE_PASS
10238 || !pic_offset_table_rtx
10239 || !REG_P (parts
.index
)
10240 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
10243 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10244 since it's predecode logic can't detect the length of instructions
10245 and it degenerates to vector decoded. Increase cost of such
10246 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10247 to split such addresses or even refuse such addresses at all.
10249 Following addressing modes are affected:
10254 The first and last case may be avoidable by explicitly coding the zero in
10255 memory address, but I don't have AMD-K6 machine handy to check this
10259 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
10260 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
10261 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
10267 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10268 this is used for to form addresses to local data when -fPIC is in
10272 darwin_local_data_pic (rtx disp
)
10274 return (GET_CODE (disp
) == UNSPEC
10275 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
10278 /* True if operand X should be loaded from GOT. */
10281 ix86_force_load_from_GOT_p (rtx x
)
10283 return ((TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
10284 && !TARGET_PECOFF
&& !TARGET_MACHO
10286 && ix86_cmodel
!= CM_LARGE
10287 && GET_CODE (x
) == SYMBOL_REF
10288 && SYMBOL_REF_FUNCTION_P (x
)
10290 || (SYMBOL_REF_DECL (x
)
10291 && lookup_attribute ("noplt",
10292 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
)))))
10293 && !SYMBOL_REF_LOCAL_P (x
));
10296 /* Determine if a given RTX is a valid constant. We already know this
10297 satisfies CONSTANT_P. */
10300 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
10302 switch (GET_CODE (x
))
10307 if (GET_CODE (x
) == PLUS
)
10309 if (!CONST_INT_P (XEXP (x
, 1)))
10314 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
10317 /* Only some unspecs are valid as "constants". */
10318 if (GET_CODE (x
) == UNSPEC
)
10319 switch (XINT (x
, 1))
10322 case UNSPEC_GOTOFF
:
10323 case UNSPEC_PLTOFF
:
10324 return TARGET_64BIT
;
10326 case UNSPEC_NTPOFF
:
10327 x
= XVECEXP (x
, 0, 0);
10328 return (GET_CODE (x
) == SYMBOL_REF
10329 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10330 case UNSPEC_DTPOFF
:
10331 x
= XVECEXP (x
, 0, 0);
10332 return (GET_CODE (x
) == SYMBOL_REF
10333 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
10338 /* We must have drilled down to a symbol. */
10339 if (GET_CODE (x
) == LABEL_REF
)
10341 if (GET_CODE (x
) != SYMBOL_REF
)
10346 /* TLS symbols are never valid. */
10347 if (SYMBOL_REF_TLS_MODEL (x
))
10350 /* DLLIMPORT symbols are never valid. */
10351 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10352 && SYMBOL_REF_DLLIMPORT_P (x
))
10356 /* mdynamic-no-pic */
10357 if (MACHO_DYNAMIC_NO_PIC_P
)
10358 return machopic_symbol_defined_p (x
);
10361 /* External function address should be loaded
10362 via the GOT slot to avoid PLT. */
10363 if (ix86_force_load_from_GOT_p (x
))
10368 CASE_CONST_SCALAR_INT
:
10369 if (ix86_endbr_immediate_operand (x
, VOIDmode
))
10380 if (!standard_sse_constant_p (x
, mode
))
10388 if (!standard_sse_constant_p (x
, mode
))
10395 /* Otherwise we handle everything else in the move patterns. */
10399 /* Determine if it's legal to put X into the constant pool. This
10400 is not possible for the address of thread-local symbols, which
10401 is checked above. */
10404 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
10406 /* We can put any immediate constant in memory. */
10407 switch (GET_CODE (x
))
10416 return !ix86_legitimate_constant_p (mode
, x
);
10419 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10423 is_imported_p (rtx x
)
10425 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10426 || GET_CODE (x
) != SYMBOL_REF
)
10429 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
10433 /* Nonzero if the constant value X is a legitimate general operand
10434 when generating PIC code. It is given that flag_pic is on and
10435 that X satisfies CONSTANT_P. */
10438 legitimate_pic_operand_p (rtx x
)
10442 switch (GET_CODE (x
))
10445 inner
= XEXP (x
, 0);
10446 if (GET_CODE (inner
) == PLUS
10447 && CONST_INT_P (XEXP (inner
, 1)))
10448 inner
= XEXP (inner
, 0);
10450 /* Only some unspecs are valid as "constants". */
10451 if (GET_CODE (inner
) == UNSPEC
)
10452 switch (XINT (inner
, 1))
10455 case UNSPEC_GOTOFF
:
10456 case UNSPEC_PLTOFF
:
10457 return TARGET_64BIT
;
10459 x
= XVECEXP (inner
, 0, 0);
10460 return (GET_CODE (x
) == SYMBOL_REF
10461 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10462 case UNSPEC_MACHOPIC_OFFSET
:
10463 return legitimate_pic_address_disp_p (x
);
10471 return legitimate_pic_address_disp_p (x
);
10478 /* Determine if a given CONST RTX is a valid memory displacement
10482 legitimate_pic_address_disp_p (rtx disp
)
10486 /* In 64bit mode we can allow direct addresses of symbols and labels
10487 when they are not dynamic symbols. */
10490 rtx op0
= disp
, op1
;
10492 switch (GET_CODE (disp
))
10498 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
10500 op0
= XEXP (XEXP (disp
, 0), 0);
10501 op1
= XEXP (XEXP (disp
, 0), 1);
10502 if (!CONST_INT_P (op1
))
10504 if (GET_CODE (op0
) == UNSPEC
10505 && (XINT (op0
, 1) == UNSPEC_DTPOFF
10506 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
10507 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
10509 if (INTVAL (op1
) >= 16*1024*1024
10510 || INTVAL (op1
) < -16*1024*1024)
10512 if (GET_CODE (op0
) == LABEL_REF
)
10514 if (GET_CODE (op0
) == CONST
10515 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
10516 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
10518 if (GET_CODE (op0
) == UNSPEC
10519 && XINT (op0
, 1) == UNSPEC_PCREL
)
10521 if (GET_CODE (op0
) != SYMBOL_REF
)
10526 /* TLS references should always be enclosed in UNSPEC.
10527 The dllimported symbol needs always to be resolved. */
10528 if (SYMBOL_REF_TLS_MODEL (op0
)
10529 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
10534 if (is_imported_p (op0
))
10537 if (SYMBOL_REF_FAR_ADDR_P (op0
)
10538 || !SYMBOL_REF_LOCAL_P (op0
))
10541 /* Function-symbols need to be resolved only for
10543 For the small-model we don't need to resolve anything
10545 if ((ix86_cmodel
!= CM_LARGE_PIC
10546 && SYMBOL_REF_FUNCTION_P (op0
))
10547 || ix86_cmodel
== CM_SMALL_PIC
)
10549 /* Non-external symbols don't need to be resolved for
10550 large, and medium-model. */
10551 if ((ix86_cmodel
== CM_LARGE_PIC
10552 || ix86_cmodel
== CM_MEDIUM_PIC
)
10553 && !SYMBOL_REF_EXTERNAL_P (op0
))
10556 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
10557 && (SYMBOL_REF_LOCAL_P (op0
)
10558 || (HAVE_LD_PIE_COPYRELOC
10560 && !SYMBOL_REF_WEAK (op0
)
10561 && !SYMBOL_REF_FUNCTION_P (op0
)))
10562 && ix86_cmodel
!= CM_LARGE_PIC
)
10570 if (GET_CODE (disp
) != CONST
)
10572 disp
= XEXP (disp
, 0);
10576 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10577 of GOT tables. We should not need these anyway. */
10578 if (GET_CODE (disp
) != UNSPEC
10579 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
10580 && XINT (disp
, 1) != UNSPEC_GOTOFF
10581 && XINT (disp
, 1) != UNSPEC_PCREL
10582 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
10585 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
10586 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
10592 if (GET_CODE (disp
) == PLUS
)
10594 if (!CONST_INT_P (XEXP (disp
, 1)))
10596 disp
= XEXP (disp
, 0);
10600 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
10603 if (GET_CODE (disp
) != UNSPEC
)
10606 switch (XINT (disp
, 1))
10611 /* We need to check for both symbols and labels because VxWorks loads
10612 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10614 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10615 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
10616 case UNSPEC_GOTOFF
:
10617 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10618 While ABI specify also 32bit relocation but we don't produce it in
10619 small PIC model at all. */
10620 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10621 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
10623 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
10625 case UNSPEC_GOTTPOFF
:
10626 case UNSPEC_GOTNTPOFF
:
10627 case UNSPEC_INDNTPOFF
:
10630 disp
= XVECEXP (disp
, 0, 0);
10631 return (GET_CODE (disp
) == SYMBOL_REF
10632 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
10633 case UNSPEC_NTPOFF
:
10634 disp
= XVECEXP (disp
, 0, 0);
10635 return (GET_CODE (disp
) == SYMBOL_REF
10636 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
10637 case UNSPEC_DTPOFF
:
10638 disp
= XVECEXP (disp
, 0, 0);
10639 return (GET_CODE (disp
) == SYMBOL_REF
10640 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
10646 /* Determine if op is suitable RTX for an address register.
10647 Return naked register if a register or a register subreg is
10648 found, otherwise return NULL_RTX. */
10651 ix86_validate_address_register (rtx op
)
10653 machine_mode mode
= GET_MODE (op
);
10655 /* Only SImode or DImode registers can form the address. */
10656 if (mode
!= SImode
&& mode
!= DImode
)
10661 else if (SUBREG_P (op
))
10663 rtx reg
= SUBREG_REG (op
);
10668 mode
= GET_MODE (reg
);
10670 /* Don't allow SUBREGs that span more than a word. It can
10671 lead to spill failures when the register is one word out
10672 of a two word structure. */
10673 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
10676 /* Allow only SUBREGs of non-eliminable hard registers. */
10677 if (register_no_elim_operand (reg
, mode
))
10681 /* Op is not a register. */
10685 /* Recognizes RTL expressions that are valid memory addresses for an
10686 instruction. The MODE argument is the machine mode for the MEM
10687 expression that wants to use this address.
10689 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10690 convert common non-canonical forms to canonical form so that they will
10694 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
)
10696 struct ix86_address parts
;
10697 rtx base
, index
, disp
;
10698 HOST_WIDE_INT scale
;
10701 if (ix86_decompose_address (addr
, &parts
) <= 0)
10702 /* Decomposition failed. */
10706 index
= parts
.index
;
10708 scale
= parts
.scale
;
10711 /* Validate base register. */
10714 rtx reg
= ix86_validate_address_register (base
);
10716 if (reg
== NULL_RTX
)
10719 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
10720 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
10721 /* Base is not valid. */
10725 /* Validate index register. */
10728 rtx reg
= ix86_validate_address_register (index
);
10730 if (reg
== NULL_RTX
)
10733 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
10734 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
10735 /* Index is not valid. */
10739 /* Index and base should have the same mode. */
10741 && GET_MODE (base
) != GET_MODE (index
))
10744 /* Address override works only on the (%reg) part of %fs:(%reg). */
10745 if (seg
!= ADDR_SPACE_GENERIC
10746 && ((base
&& GET_MODE (base
) != word_mode
)
10747 || (index
&& GET_MODE (index
) != word_mode
)))
10750 /* Validate scale factor. */
10754 /* Scale without index. */
10757 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
10758 /* Scale is not a valid multiplier. */
10762 /* Validate displacement. */
10765 if (ix86_endbr_immediate_operand (disp
, VOIDmode
))
10768 if (GET_CODE (disp
) == CONST
10769 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
10770 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
10771 switch (XINT (XEXP (disp
, 0), 1))
10773 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10774 when used. While ABI specify also 32bit relocations, we
10775 don't produce them at all and use IP relative instead.
10776 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10777 should be loaded via GOT. */
10780 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10781 goto is_legitimate_pic
;
10783 case UNSPEC_GOTOFF
:
10784 gcc_assert (flag_pic
);
10786 goto is_legitimate_pic
;
10788 /* 64bit address unspec. */
10791 case UNSPEC_GOTPCREL
:
10792 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10793 goto is_legitimate_pic
;
10796 gcc_assert (flag_pic
);
10797 goto is_legitimate_pic
;
10799 case UNSPEC_GOTTPOFF
:
10800 case UNSPEC_GOTNTPOFF
:
10801 case UNSPEC_INDNTPOFF
:
10802 case UNSPEC_NTPOFF
:
10803 case UNSPEC_DTPOFF
:
10807 /* Invalid address unspec. */
10811 else if (SYMBOLIC_CONST (disp
)
10815 && MACHOPIC_INDIRECT
10816 && !machopic_operand_p (disp
)
10822 if (TARGET_64BIT
&& (index
|| base
))
10824 /* foo@dtpoff(%rX) is ok. */
10825 if (GET_CODE (disp
) != CONST
10826 || GET_CODE (XEXP (disp
, 0)) != PLUS
10827 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
10828 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
10829 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
10830 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
10831 /* Non-constant pic memory reference. */
10834 else if ((!TARGET_MACHO
|| flag_pic
)
10835 && ! legitimate_pic_address_disp_p (disp
))
10836 /* Displacement is an invalid pic construct. */
10839 else if (MACHO_DYNAMIC_NO_PIC_P
10840 && !ix86_legitimate_constant_p (Pmode
, disp
))
10841 /* displacment must be referenced via non_lazy_pointer */
10845 /* This code used to verify that a symbolic pic displacement
10846 includes the pic_offset_table_rtx register.
10848 While this is good idea, unfortunately these constructs may
10849 be created by "adds using lea" optimization for incorrect
10858 This code is nonsensical, but results in addressing
10859 GOT table with pic_offset_table_rtx base. We can't
10860 just refuse it easily, since it gets matched by
10861 "addsi3" pattern, that later gets split to lea in the
10862 case output register differs from input. While this
10863 can be handled by separate addsi pattern for this case
10864 that never results in lea, this seems to be easier and
10865 correct fix for crash to disable this test. */
10867 else if (GET_CODE (disp
) != LABEL_REF
10868 && !CONST_INT_P (disp
)
10869 && (GET_CODE (disp
) != CONST
10870 || !ix86_legitimate_constant_p (Pmode
, disp
))
10871 && (GET_CODE (disp
) != SYMBOL_REF
10872 || !ix86_legitimate_constant_p (Pmode
, disp
)))
10873 /* Displacement is not constant. */
10875 else if (TARGET_64BIT
10876 && !x86_64_immediate_operand (disp
, VOIDmode
))
10877 /* Displacement is out of range. */
10879 /* In x32 mode, constant addresses are sign extended to 64bit, so
10880 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10881 else if (TARGET_X32
&& !(index
|| base
)
10882 && CONST_INT_P (disp
)
10883 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
10887 /* Everything looks valid. */
10891 /* Determine if a given RTX is a valid constant address. */
10894 constant_address_p (rtx x
)
10896 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
10899 /* Return a unique alias set for the GOT. */
10902 ix86_GOT_alias_set (void)
10904 static alias_set_type set
= -1;
10906 set
= new_alias_set ();
10910 /* Return a legitimate reference for ORIG (an address) using the
10911 register REG. If REG is 0, a new pseudo is generated.
10913 There are two types of references that must be handled:
10915 1. Global data references must load the address from the GOT, via
10916 the PIC reg. An insn is emitted to do this load, and the reg is
10919 2. Static data references, constant pool addresses, and code labels
10920 compute the address as an offset from the GOT, whose base is in
10921 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10922 differentiate them from global data objects. The returned
10923 address is the PIC reg + an unspec constant.
10925 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10926 reg also appears in the address. */
10929 legitimize_pic_address (rtx orig
, rtx reg
)
10932 rtx new_rtx
= orig
;
10935 if (TARGET_MACHO
&& !TARGET_64BIT
)
10938 reg
= gen_reg_rtx (Pmode
);
10939 /* Use the generic Mach-O PIC machinery. */
10940 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
10944 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
10946 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10951 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
10953 else if ((!TARGET_64BIT
10954 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
10956 && gotoff_operand (addr
, Pmode
))
10958 /* This symbol may be referenced via a displacement
10959 from the PIC base address (@GOTOFF). */
10960 if (GET_CODE (addr
) == CONST
)
10961 addr
= XEXP (addr
, 0);
10963 if (GET_CODE (addr
) == PLUS
)
10965 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
10967 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
10970 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
10972 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10975 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
10979 gcc_assert (REG_P (reg
));
10980 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
10981 new_rtx
, reg
, 1, OPTAB_DIRECT
);
10984 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10986 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
10987 /* We can't use @GOTOFF for text labels
10988 on VxWorks, see gotoff_operand. */
10989 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
10991 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10995 /* For x64 PE-COFF there is no GOT table,
10996 so we use address directly. */
10997 if (TARGET_64BIT
&& TARGET_PECOFF
)
10999 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
11000 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11002 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
11004 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
11006 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11007 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11008 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11012 /* This symbol must be referenced via a load
11013 from the Global Offset Table (@GOT). */
11014 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
11015 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11017 new_rtx
= force_reg (Pmode
, new_rtx
);
11018 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11019 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11020 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11023 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11027 if (CONST_INT_P (addr
)
11028 && !x86_64_immediate_operand (addr
, VOIDmode
))
11029 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
11030 else if (GET_CODE (addr
) == CONST
)
11032 addr
= XEXP (addr
, 0);
11034 /* We must match stuff we generate before. Assume the only
11035 unspecs that can get here are ours. Not that we could do
11036 anything with them anyway.... */
11037 if (GET_CODE (addr
) == UNSPEC
11038 || (GET_CODE (addr
) == PLUS
11039 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
11041 gcc_assert (GET_CODE (addr
) == PLUS
);
11044 if (GET_CODE (addr
) == PLUS
)
11046 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
11048 /* Check first to see if this is a constant
11049 offset from a @GOTOFF symbol reference. */
11051 && gotoff_operand (op0
, Pmode
)
11052 && CONST_INT_P (op1
))
11056 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
11058 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
11059 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11063 gcc_assert (REG_P (reg
));
11064 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
11065 pic_offset_table_rtx
,
11071 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11075 if (INTVAL (op1
) < -16*1024*1024
11076 || INTVAL (op1
) >= 16*1024*1024)
11078 if (!x86_64_immediate_operand (op1
, Pmode
))
11079 op1
= force_reg (Pmode
, op1
);
11082 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
11088 rtx base
= legitimize_pic_address (op0
, reg
);
11089 machine_mode mode
= GET_MODE (base
);
11091 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
11093 if (CONST_INT_P (new_rtx
))
11095 if (INTVAL (new_rtx
) < -16*1024*1024
11096 || INTVAL (new_rtx
) >= 16*1024*1024)
11098 if (!x86_64_immediate_operand (new_rtx
, mode
))
11099 new_rtx
= force_reg (mode
, new_rtx
);
11102 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
11105 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
11109 /* For %rip addressing, we have to use
11110 just disp32, not base nor index. */
11112 && (GET_CODE (base
) == SYMBOL_REF
11113 || GET_CODE (base
) == LABEL_REF
))
11114 base
= force_reg (mode
, base
);
11115 if (GET_CODE (new_rtx
) == PLUS
11116 && CONSTANT_P (XEXP (new_rtx
, 1)))
11118 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
11119 new_rtx
= XEXP (new_rtx
, 1);
11121 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
11129 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11132 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
11134 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
11136 if (GET_MODE (tp
) != tp_mode
)
11138 gcc_assert (GET_MODE (tp
) == SImode
);
11139 gcc_assert (tp_mode
== DImode
);
11141 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
11145 tp
= copy_to_mode_reg (tp_mode
, tp
);
11150 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11152 static GTY(()) rtx ix86_tls_symbol
;
11155 ix86_tls_get_addr (void)
11157 if (!ix86_tls_symbol
)
11160 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
11161 ? "___tls_get_addr" : "__tls_get_addr");
11163 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
11166 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
11168 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
11170 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
11171 gen_rtx_CONST (Pmode
, unspec
));
11174 return ix86_tls_symbol
;
11177 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11179 static GTY(()) rtx ix86_tls_module_base_symbol
;
11182 ix86_tls_module_base (void)
11184 if (!ix86_tls_module_base_symbol
)
11186 ix86_tls_module_base_symbol
11187 = gen_rtx_SYMBOL_REF (ptr_mode
, "_TLS_MODULE_BASE_");
11189 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
11190 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
11193 return ix86_tls_module_base_symbol
;
11196 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11197 false if we expect this to be used for a memory address and true if
11198 we expect to load the address into a register. */
11201 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
11203 rtx dest
, base
, off
;
11204 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
11205 machine_mode tp_mode
= Pmode
;
11208 /* Fall back to global dynamic model if tool chain cannot support local
11210 if (TARGET_SUN_TLS
&& !TARGET_64BIT
11211 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
11212 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
11213 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
11217 case TLS_MODEL_GLOBAL_DYNAMIC
:
11220 if (flag_pic
&& !TARGET_PECOFF
)
11221 pic
= pic_offset_table_rtx
;
11224 pic
= gen_reg_rtx (Pmode
);
11225 emit_insn (gen_set_got (pic
));
11229 if (TARGET_GNU2_TLS
)
11231 dest
= gen_reg_rtx (ptr_mode
);
11233 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, dest
, x
));
11235 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
11237 tp
= get_thread_pointer (ptr_mode
, true);
11238 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
11239 if (GET_MODE (dest
) != Pmode
)
11240 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
11241 dest
= force_reg (Pmode
, dest
);
11243 if (GET_MODE (x
) != Pmode
)
11244 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11246 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
11250 rtx caddr
= ix86_tls_get_addr ();
11252 dest
= gen_reg_rtx (Pmode
);
11255 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
11260 (gen_tls_global_dynamic_64 (Pmode
, rax
, x
, caddr
));
11261 insns
= get_insns ();
11264 if (GET_MODE (x
) != Pmode
)
11265 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11267 RTL_CONST_CALL_P (insns
) = 1;
11268 emit_libcall_block (insns
, dest
, rax
, x
);
11271 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
11275 case TLS_MODEL_LOCAL_DYNAMIC
:
11279 pic
= pic_offset_table_rtx
;
11282 pic
= gen_reg_rtx (Pmode
);
11283 emit_insn (gen_set_got (pic
));
11287 if (TARGET_GNU2_TLS
)
11289 rtx tmp
= ix86_tls_module_base ();
11291 base
= gen_reg_rtx (ptr_mode
);
11293 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, base
, tmp
));
11295 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
11297 tp
= get_thread_pointer (ptr_mode
, true);
11298 if (GET_MODE (base
) != Pmode
)
11299 base
= gen_rtx_ZERO_EXTEND (Pmode
, base
);
11300 base
= force_reg (Pmode
, base
);
11304 rtx caddr
= ix86_tls_get_addr ();
11306 base
= gen_reg_rtx (Pmode
);
11309 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
11315 (gen_tls_local_dynamic_base_64 (Pmode
, rax
, caddr
));
11316 insns
= get_insns ();
11319 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11320 share the LD_BASE result with other LD model accesses. */
11321 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11322 UNSPEC_TLS_LD_BASE
);
11324 RTL_CONST_CALL_P (insns
) = 1;
11325 emit_libcall_block (insns
, base
, rax
, eqv
);
11328 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
11331 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
11332 off
= gen_rtx_CONST (Pmode
, off
);
11334 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
11336 if (TARGET_GNU2_TLS
)
11338 if (GET_MODE (tp
) != Pmode
)
11340 dest
= lowpart_subreg (ptr_mode
, dest
, Pmode
);
11341 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
11342 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
11345 dest
= gen_rtx_PLUS (Pmode
, tp
, dest
);
11346 dest
= force_reg (Pmode
, dest
);
11348 if (GET_MODE (x
) != Pmode
)
11349 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11351 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
11355 case TLS_MODEL_INITIAL_EXEC
:
11358 if (TARGET_SUN_TLS
&& !TARGET_X32
)
11360 /* The Sun linker took the AMD64 TLS spec literally
11361 and can only handle %rax as destination of the
11362 initial executable code sequence. */
11364 dest
= gen_reg_rtx (DImode
);
11365 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
11369 /* Generate DImode references to avoid %fs:(%reg32)
11370 problems and linker IE->LE relaxation bug. */
11373 type
= UNSPEC_GOTNTPOFF
;
11377 pic
= pic_offset_table_rtx
;
11378 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
11380 else if (!TARGET_ANY_GNU_TLS
)
11382 pic
= gen_reg_rtx (Pmode
);
11383 emit_insn (gen_set_got (pic
));
11384 type
= UNSPEC_GOTTPOFF
;
11389 type
= UNSPEC_INDNTPOFF
;
11392 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
11393 off
= gen_rtx_CONST (tp_mode
, off
);
11395 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
11396 off
= gen_const_mem (tp_mode
, off
);
11397 set_mem_alias_set (off
, ix86_GOT_alias_set ());
11399 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11401 base
= get_thread_pointer (tp_mode
,
11402 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
11403 off
= force_reg (tp_mode
, off
);
11404 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
11405 if (tp_mode
!= Pmode
)
11406 dest
= convert_to_mode (Pmode
, dest
, 1);
11410 base
= get_thread_pointer (Pmode
, true);
11411 dest
= gen_reg_rtx (Pmode
);
11412 emit_insn (gen_sub3_insn (dest
, base
, off
));
11416 case TLS_MODEL_LOCAL_EXEC
:
11417 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
11418 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11419 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
11420 off
= gen_rtx_CONST (Pmode
, off
);
11422 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11424 base
= get_thread_pointer (Pmode
,
11425 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
11426 return gen_rtx_PLUS (Pmode
, base
, off
);
11430 base
= get_thread_pointer (Pmode
, true);
11431 dest
= gen_reg_rtx (Pmode
);
11432 emit_insn (gen_sub3_insn (dest
, base
, off
));
11437 gcc_unreachable ();
11443 /* Return true if OP refers to a TLS address. */
11445 ix86_tls_address_pattern_p (rtx op
)
11447 subrtx_var_iterator::array_type array
;
11448 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
11453 rtx
*x
= &XEXP (op
, 0);
11454 while (GET_CODE (*x
) == PLUS
)
11457 for (i
= 0; i
< 2; i
++)
11459 rtx u
= XEXP (*x
, i
);
11460 if (GET_CODE (u
) == ZERO_EXTEND
)
11462 if (GET_CODE (u
) == UNSPEC
11463 && XINT (u
, 1) == UNSPEC_TP
)
11469 iter
.skip_subrtxes ();
11476 /* Rewrite *LOC so that it refers to a default TLS address space. */
11478 ix86_rewrite_tls_address_1 (rtx
*loc
)
11480 subrtx_ptr_iterator::array_type array
;
11481 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
11486 rtx addr
= XEXP (*loc
, 0);
11488 while (GET_CODE (*x
) == PLUS
)
11491 for (i
= 0; i
< 2; i
++)
11493 rtx u
= XEXP (*x
, i
);
11494 if (GET_CODE (u
) == ZERO_EXTEND
)
11496 if (GET_CODE (u
) == UNSPEC
11497 && XINT (u
, 1) == UNSPEC_TP
)
11499 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
11501 *x
= XEXP (*x
, 1 - i
);
11503 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
11504 set_mem_addr_space (*loc
, as
);
11511 iter
.skip_subrtxes ();
11516 /* Rewrite instruction pattern involvning TLS address
11517 so that it refers to a default TLS address space. */
11519 ix86_rewrite_tls_address (rtx pattern
)
11521 pattern
= copy_insn (pattern
);
11522 ix86_rewrite_tls_address_1 (&pattern
);
11526 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11527 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11528 unique refptr-DECL symbol corresponding to symbol DECL. */
11530 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
11532 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
11534 equal (tree_map
*a
, tree_map
*b
)
11536 return a
->base
.from
== b
->base
.from
;
11540 keep_cache_entry (tree_map
*&m
)
11542 return ggc_marked_p (m
->base
.from
);
11546 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
11549 get_dllimport_decl (tree decl
, bool beimport
)
11551 struct tree_map
*h
, in
;
11553 const char *prefix
;
11554 size_t namelen
, prefixlen
;
11559 if (!dllimport_map
)
11560 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
11562 in
.hash
= htab_hash_pointer (decl
);
11563 in
.base
.from
= decl
;
11564 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
11569 *loc
= h
= ggc_alloc
<tree_map
> ();
11571 h
->base
.from
= decl
;
11572 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
11573 VAR_DECL
, NULL
, ptr_type_node
);
11574 DECL_ARTIFICIAL (to
) = 1;
11575 DECL_IGNORED_P (to
) = 1;
11576 DECL_EXTERNAL (to
) = 1;
11577 TREE_READONLY (to
) = 1;
11579 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
11580 name
= targetm
.strip_name_encoding (name
);
11582 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
11583 ? "*__imp_" : "*__imp__";
11585 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
11586 namelen
= strlen (name
);
11587 prefixlen
= strlen (prefix
);
11588 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
11589 memcpy (imp_name
, prefix
, prefixlen
);
11590 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
11592 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
11593 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11594 SET_SYMBOL_REF_DECL (rtl
, to
);
11595 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
11598 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
11599 #ifdef SUB_TARGET_RECORD_STUB
11600 SUB_TARGET_RECORD_STUB (name
);
11604 rtl
= gen_const_mem (Pmode
, rtl
);
11605 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
11607 SET_DECL_RTL (to
, rtl
);
11608 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
11613 /* Expand SYMBOL into its corresponding far-address symbol.
11614 WANT_REG is true if we require the result be a register. */
11617 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
11622 gcc_assert (SYMBOL_REF_DECL (symbol
));
11623 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
11625 x
= DECL_RTL (imp_decl
);
11627 x
= force_reg (Pmode
, x
);
11631 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11632 true if we require the result be a register. */
11635 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
11640 gcc_assert (SYMBOL_REF_DECL (symbol
));
11641 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
11643 x
= DECL_RTL (imp_decl
);
11645 x
= force_reg (Pmode
, x
);
11649 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11650 is true if we require the result be a register. */
11653 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
11655 if (!TARGET_PECOFF
)
11658 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11660 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
11661 return legitimize_dllimport_symbol (addr
, inreg
);
11662 if (GET_CODE (addr
) == CONST
11663 && GET_CODE (XEXP (addr
, 0)) == PLUS
11664 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11665 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
11667 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
11668 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11672 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
11674 if (GET_CODE (addr
) == SYMBOL_REF
11675 && !is_imported_p (addr
)
11676 && SYMBOL_REF_EXTERNAL_P (addr
)
11677 && SYMBOL_REF_DECL (addr
))
11678 return legitimize_pe_coff_extern_decl (addr
, inreg
);
11680 if (GET_CODE (addr
) == CONST
11681 && GET_CODE (XEXP (addr
, 0)) == PLUS
11682 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11683 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
11684 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
11685 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
11687 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
11688 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11693 /* Try machine-dependent ways of modifying an illegitimate address
11694 to be legitimate. If we find one, return the new, valid address.
11695 This macro is used in only one place: `memory_address' in explow.c.
11697 OLDX is the address as it was before break_out_memory_refs was called.
11698 In some cases it is useful to look at this to decide what needs to be done.
11700 It is always safe for this macro to do nothing. It exists to recognize
11701 opportunities to optimize the output.
11703 For the 80386, we handle X+REG by loading X into a register R and
11704 using R+REG. R will go in a general reg and indexing will be used.
11705 However, if REG is a broken-out memory address or multiplication,
11706 nothing needs to be done because REG can certainly go in a general reg.
11708 When -fpic is used, special handling is needed for symbolic references.
11709 See comments by legitimize_pic_address in i386.c for details. */
11712 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
11714 bool changed
= false;
11717 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
11719 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
11720 if (GET_CODE (x
) == CONST
11721 && GET_CODE (XEXP (x
, 0)) == PLUS
11722 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
11723 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
11725 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
11726 (enum tls_model
) log
, false);
11727 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
11730 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11732 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
11737 if (flag_pic
&& SYMBOLIC_CONST (x
))
11738 return legitimize_pic_address (x
, 0);
11741 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
11742 return machopic_indirect_data_reference (x
, 0);
11745 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11746 if (GET_CODE (x
) == ASHIFT
11747 && CONST_INT_P (XEXP (x
, 1))
11748 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
11751 log
= INTVAL (XEXP (x
, 1));
11752 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
11753 GEN_INT (1 << log
));
11756 if (GET_CODE (x
) == PLUS
)
11758 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11760 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
11761 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11762 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
11765 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
11766 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
11767 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
11768 GEN_INT (1 << log
));
11771 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
11772 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11773 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
11776 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
11777 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
11778 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
11779 GEN_INT (1 << log
));
11782 /* Put multiply first if it isn't already. */
11783 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11785 std::swap (XEXP (x
, 0), XEXP (x
, 1));
11789 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11790 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11791 created by virtual register instantiation, register elimination, and
11792 similar optimizations. */
11793 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
11796 x
= gen_rtx_PLUS (Pmode
,
11797 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
11798 XEXP (XEXP (x
, 1), 0)),
11799 XEXP (XEXP (x
, 1), 1));
11803 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11804 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11805 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
11806 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11807 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
11808 && CONSTANT_P (XEXP (x
, 1)))
11811 rtx other
= NULL_RTX
;
11813 if (CONST_INT_P (XEXP (x
, 1)))
11815 constant
= XEXP (x
, 1);
11816 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11818 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
11820 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11821 other
= XEXP (x
, 1);
11829 x
= gen_rtx_PLUS (Pmode
,
11830 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
11831 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
11832 plus_constant (Pmode
, other
,
11833 INTVAL (constant
)));
11837 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11840 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11843 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
11846 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11849 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
11853 && REG_P (XEXP (x
, 1))
11854 && REG_P (XEXP (x
, 0)))
11857 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
11860 x
= legitimize_pic_address (x
, 0);
11863 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11866 if (REG_P (XEXP (x
, 0)))
11868 rtx temp
= gen_reg_rtx (Pmode
);
11869 rtx val
= force_operand (XEXP (x
, 1), temp
);
11872 val
= convert_to_mode (Pmode
, val
, 1);
11873 emit_move_insn (temp
, val
);
11876 XEXP (x
, 1) = temp
;
11880 else if (REG_P (XEXP (x
, 1)))
11882 rtx temp
= gen_reg_rtx (Pmode
);
11883 rtx val
= force_operand (XEXP (x
, 0), temp
);
11886 val
= convert_to_mode (Pmode
, val
, 1);
11887 emit_move_insn (temp
, val
);
11890 XEXP (x
, 0) = temp
;
11898 /* Print an integer constant expression in assembler syntax. Addition
11899 and subtraction are the only arithmetic that may appear in these
11900 expressions. FILE is the stdio stream to write to, X is the rtx, and
11901 CODE is the operand print code from the output string. */
11904 output_pic_addr_const (FILE *file
, rtx x
, int code
)
11908 switch (GET_CODE (x
))
11911 gcc_assert (flag_pic
);
11916 if (TARGET_64BIT
|| ! TARGET_MACHO_SYMBOL_STUBS
)
11917 output_addr_const (file
, x
);
11920 const char *name
= XSTR (x
, 0);
11922 /* Mark the decl as referenced so that cgraph will
11923 output the function. */
11924 if (SYMBOL_REF_DECL (x
))
11925 mark_decl_referenced (SYMBOL_REF_DECL (x
));
11928 if (MACHOPIC_INDIRECT
11929 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
11930 name
= machopic_indirection_name (x
, /*stub_p=*/true);
11932 assemble_name (file
, name
);
11934 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
11935 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
11936 fputs ("@PLT", file
);
11943 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
11944 assemble_name (asm_out_file
, buf
);
11948 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
11952 /* This used to output parentheses around the expression,
11953 but that does not work on the 386 (either ATT or BSD assembler). */
11954 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11958 /* We can't handle floating point constants;
11959 TARGET_PRINT_OPERAND must handle them. */
11960 output_operand_lossage ("floating constant misused");
11964 /* Some assemblers need integer constants to appear first. */
11965 if (CONST_INT_P (XEXP (x
, 0)))
11967 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11969 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11973 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
11974 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11976 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11982 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
11983 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11985 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11987 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
11991 gcc_assert (XVECLEN (x
, 0) == 1);
11992 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
11993 switch (XINT (x
, 1))
11996 fputs ("@GOT", file
);
11998 case UNSPEC_GOTOFF
:
11999 fputs ("@GOTOFF", file
);
12001 case UNSPEC_PLTOFF
:
12002 fputs ("@PLTOFF", file
);
12005 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12006 "(%rip)" : "[rip]", file
);
12008 case UNSPEC_GOTPCREL
:
12009 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12010 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
12012 case UNSPEC_GOTTPOFF
:
12013 /* FIXME: This might be @TPOFF in Sun ld too. */
12014 fputs ("@gottpoff", file
);
12017 fputs ("@tpoff", file
);
12019 case UNSPEC_NTPOFF
:
12021 fputs ("@tpoff", file
);
12023 fputs ("@ntpoff", file
);
12025 case UNSPEC_DTPOFF
:
12026 fputs ("@dtpoff", file
);
12028 case UNSPEC_GOTNTPOFF
:
12030 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12031 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
12033 fputs ("@gotntpoff", file
);
12035 case UNSPEC_INDNTPOFF
:
12036 fputs ("@indntpoff", file
);
12039 case UNSPEC_MACHOPIC_OFFSET
:
12041 machopic_output_function_base_name (file
);
12045 output_operand_lossage ("invalid UNSPEC as operand");
12051 output_operand_lossage ("invalid expression as operand");
12055 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12056 We need to emit DTP-relative relocations. */
12058 static void ATTRIBUTE_UNUSED
12059 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
12061 fputs (ASM_LONG
, file
);
12062 output_addr_const (file
, x
);
12063 fputs ("@dtpoff", file
);
12069 fputs (", 0", file
);
12072 gcc_unreachable ();
12076 /* Return true if X is a representation of the PIC register. This copes
12077 with calls from ix86_find_base_term, where the register might have
12078 been replaced by a cselib value. */
12081 ix86_pic_register_p (rtx x
)
12083 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
12084 return (pic_offset_table_rtx
12085 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
12086 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SET_GOT
)
12088 else if (!REG_P (x
))
12090 else if (pic_offset_table_rtx
)
12092 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
12094 if (HARD_REGISTER_P (x
)
12095 && !HARD_REGISTER_P (pic_offset_table_rtx
)
12096 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
12101 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
12104 /* Helper function for ix86_delegitimize_address.
12105 Attempt to delegitimize TLS local-exec accesses. */
12108 ix86_delegitimize_tls_address (rtx orig_x
)
12110 rtx x
= orig_x
, unspec
;
12111 struct ix86_address addr
;
12113 if (!TARGET_TLS_DIRECT_SEG_REFS
)
12117 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
12119 if (ix86_decompose_address (x
, &addr
) == 0
12120 || addr
.seg
!= DEFAULT_TLS_SEG_REG
12121 || addr
.disp
== NULL_RTX
12122 || GET_CODE (addr
.disp
) != CONST
)
12124 unspec
= XEXP (addr
.disp
, 0);
12125 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
12126 unspec
= XEXP (unspec
, 0);
12127 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
12129 x
= XVECEXP (unspec
, 0, 0);
12130 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
12131 if (unspec
!= XEXP (addr
.disp
, 0))
12132 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
12135 rtx idx
= addr
.index
;
12136 if (addr
.scale
!= 1)
12137 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
12138 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
12141 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
12142 if (MEM_P (orig_x
))
12143 x
= replace_equiv_address_nv (orig_x
, x
);
12147 /* In the name of slightly smaller debug output, and to cater to
12148 general assembler lossage, recognize PIC+GOTOFF and turn it back
12149 into a direct symbol reference.
12151 On Darwin, this is necessary to avoid a crash, because Darwin
12152 has a different PIC label for each routine but the DWARF debugging
12153 information is not associated with any particular routine, so it's
12154 necessary to remove references to the PIC label from RTL stored by
12155 the DWARF output code.
12157 This helper is used in the normal ix86_delegitimize_address
12158 entrypoint (e.g. used in the target delegitimization hook) and
12159 in ix86_find_base_term. As compile time memory optimization, we
12160 avoid allocating rtxes that will not change anything on the outcome
12161 of the callers (find_base_value and find_base_term). */
12164 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
12166 rtx orig_x
= delegitimize_mem_from_attrs (x
);
12167 /* addend is NULL or some rtx if x is something+GOTOFF where
12168 something doesn't include the PIC register. */
12169 rtx addend
= NULL_RTX
;
12170 /* reg_addend is NULL or a multiple of some register. */
12171 rtx reg_addend
= NULL_RTX
;
12172 /* const_addend is NULL or a const_int. */
12173 rtx const_addend
= NULL_RTX
;
12174 /* This is the result, or NULL. */
12175 rtx result
= NULL_RTX
;
12184 if (GET_CODE (x
) == CONST
12185 && GET_CODE (XEXP (x
, 0)) == PLUS
12186 && GET_MODE (XEXP (x
, 0)) == Pmode
12187 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12188 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
12189 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
12191 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12192 base. A CONST can't be arg_pointer_rtx based. */
12193 if (base_term_p
&& MEM_P (orig_x
))
12195 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
12196 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
12197 if (MEM_P (orig_x
))
12198 x
= replace_equiv_address_nv (orig_x
, x
);
12202 if (GET_CODE (x
) == CONST
12203 && GET_CODE (XEXP (x
, 0)) == UNSPEC
12204 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
12205 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
12206 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
12208 x
= XVECEXP (XEXP (x
, 0), 0, 0);
12209 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
12211 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
12218 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
12219 return ix86_delegitimize_tls_address (orig_x
);
12221 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12222 and -mcmodel=medium -fpic. */
12225 if (GET_CODE (x
) != PLUS
12226 || GET_CODE (XEXP (x
, 1)) != CONST
)
12227 return ix86_delegitimize_tls_address (orig_x
);
12229 if (ix86_pic_register_p (XEXP (x
, 0)))
12230 /* %ebx + GOT/GOTOFF */
12232 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
12234 /* %ebx + %reg * scale + GOT/GOTOFF */
12235 reg_addend
= XEXP (x
, 0);
12236 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
12237 reg_addend
= XEXP (reg_addend
, 1);
12238 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
12239 reg_addend
= XEXP (reg_addend
, 0);
12242 reg_addend
= NULL_RTX
;
12243 addend
= XEXP (x
, 0);
12247 addend
= XEXP (x
, 0);
12249 x
= XEXP (XEXP (x
, 1), 0);
12250 if (GET_CODE (x
) == PLUS
12251 && CONST_INT_P (XEXP (x
, 1)))
12253 const_addend
= XEXP (x
, 1);
12257 if (GET_CODE (x
) == UNSPEC
12258 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
12259 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
12260 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
12261 && !MEM_P (orig_x
) && !addend
)))
12262 result
= XVECEXP (x
, 0, 0);
12264 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
12265 && !MEM_P (orig_x
))
12266 result
= XVECEXP (x
, 0, 0);
12269 return ix86_delegitimize_tls_address (orig_x
);
12271 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12272 recurse on the first operand. */
12273 if (const_addend
&& !base_term_p
)
12274 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
12276 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
12279 /* If the rest of original X doesn't involve the PIC register, add
12280 addend and subtract pic_offset_table_rtx. This can happen e.g.
12282 leal (%ebx, %ecx, 4), %ecx
12284 movl foo@GOTOFF(%ecx), %edx
12285 in which case we return (%ecx - %ebx) + foo
12286 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12287 and reload has completed. Don't do the latter for debug,
12288 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12289 if (pic_offset_table_rtx
12290 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
12291 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
12292 pic_offset_table_rtx
),
12294 else if (base_term_p
12295 && pic_offset_table_rtx
12297 && !TARGET_VXWORKS_RTP
)
12299 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
12300 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
12301 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
12306 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
12308 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
12309 if (result
== NULL_RTX
)
12315 /* The normal instantiation of the above template. */
12318 ix86_delegitimize_address (rtx x
)
12320 return ix86_delegitimize_address_1 (x
, false);
12323 /* If X is a machine specific address (i.e. a symbol or label being
12324 referenced as a displacement from the GOT implemented using an
12325 UNSPEC), then return the base term. Otherwise return X. */
12328 ix86_find_base_term (rtx x
)
12334 if (GET_CODE (x
) != CONST
)
12336 term
= XEXP (x
, 0);
12337 if (GET_CODE (term
) == PLUS
12338 && CONST_INT_P (XEXP (term
, 1)))
12339 term
= XEXP (term
, 0);
12340 if (GET_CODE (term
) != UNSPEC
12341 || (XINT (term
, 1) != UNSPEC_GOTPCREL
12342 && XINT (term
, 1) != UNSPEC_PCREL
))
12345 return XVECEXP (term
, 0, 0);
12348 return ix86_delegitimize_address_1 (x
, true);
12351 /* Return true if X shouldn't be emitted into the debug info.
12352 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12353 symbol easily into the .debug_info section, so we need not to
12354 delegitimize, but instead assemble as @gotoff.
12355 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12356 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12359 ix86_const_not_ok_for_debug_p (rtx x
)
12361 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
12364 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
12371 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
12372 bool fp
, FILE *file
)
12374 const char *suffix
;
12376 if (mode
== CCFPmode
)
12378 code
= ix86_fp_compare_code_to_integer (code
);
12382 code
= reverse_condition (code
);
12387 gcc_assert (mode
!= CCGZmode
);
12411 gcc_assert (mode
!= CCGZmode
);
12435 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
12439 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12440 Those same assemblers have the same but opposite lossage on cmov. */
12441 if (mode
== CCmode
)
12442 suffix
= fp
? "nbe" : "a";
12444 gcc_unreachable ();
12461 gcc_unreachable ();
12465 if (mode
== CCmode
|| mode
== CCGZmode
)
12467 else if (mode
== CCCmode
)
12468 suffix
= fp
? "b" : "c";
12470 gcc_unreachable ();
12487 gcc_unreachable ();
12491 if (mode
== CCmode
|| mode
== CCGZmode
)
12493 else if (mode
== CCCmode
)
12494 suffix
= fp
? "nb" : "nc";
12496 gcc_unreachable ();
12499 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
12503 if (mode
== CCmode
)
12506 gcc_unreachable ();
12509 suffix
= fp
? "u" : "p";
12512 suffix
= fp
? "nu" : "np";
12515 gcc_unreachable ();
12517 fputs (suffix
, file
);
12520 /* Print the name of register X to FILE based on its machine mode and number.
12521 If CODE is 'w', pretend the mode is HImode.
12522 If CODE is 'b', pretend the mode is QImode.
12523 If CODE is 'k', pretend the mode is SImode.
12524 If CODE is 'q', pretend the mode is DImode.
12525 If CODE is 'x', pretend the mode is V4SFmode.
12526 If CODE is 't', pretend the mode is V8SFmode.
12527 If CODE is 'g', pretend the mode is V16SFmode.
12528 If CODE is 'h', pretend the reg is the 'high' byte register.
12529 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12530 If CODE is 'd', duplicate the operand for AVX instruction.
12531 If CODE is 'V', print naked full integer register name without %.
12535 print_reg (rtx x
, int code
, FILE *file
)
12539 unsigned int regno
;
12542 if (ASSEMBLER_DIALECT
== ASM_ATT
&& code
!= 'V')
12547 gcc_assert (TARGET_64BIT
);
12548 fputs ("rip", file
);
12552 if (code
== 'y' && STACK_TOP_P (x
))
12554 fputs ("st(0)", file
);
12560 else if (code
== 'b')
12562 else if (code
== 'k')
12564 else if (code
== 'q')
12566 else if (code
== 'h')
12568 else if (code
== 'x')
12570 else if (code
== 't')
12572 else if (code
== 'g')
12575 msize
= GET_MODE_SIZE (GET_MODE (x
));
12579 if (regno
== ARG_POINTER_REGNUM
12580 || regno
== FRAME_POINTER_REGNUM
12581 || regno
== FPSR_REG
)
12583 output_operand_lossage
12584 ("invalid use of register '%s'", reg_names
[regno
]);
12587 else if (regno
== FLAGS_REG
)
12589 output_operand_lossage ("invalid use of asm flag output");
12595 if (GENERAL_REGNO_P (regno
))
12596 msize
= GET_MODE_SIZE (word_mode
);
12598 error ("%<V%> modifier on non-integer register");
12601 duplicated
= code
== 'd' && TARGET_AVX
;
12608 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
12609 warning (0, "unsupported size for integer register");
12612 if (LEGACY_INT_REGNO_P (regno
))
12613 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
12617 reg
= hi_reg_name
[regno
];
12620 if (regno
>= ARRAY_SIZE (qi_reg_name
))
12622 if (!ANY_QI_REGNO_P (regno
))
12623 error ("unsupported size for integer register");
12624 reg
= qi_reg_name
[regno
];
12627 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
12629 reg
= qi_high_reg_name
[regno
];
12633 if (SSE_REGNO_P (regno
))
12635 gcc_assert (!duplicated
);
12636 putc (msize
== 32 ? 'y' : 'z', file
);
12637 reg
= hi_reg_name
[regno
] + 1;
12642 gcc_unreachable ();
12647 /* Irritatingly, AMD extended registers use
12648 different naming convention: "r%d[bwd]" */
12649 if (REX_INT_REGNO_P (regno
))
12651 gcc_assert (TARGET_64BIT
);
12655 error ("extended registers have no high halves");
12670 error ("unsupported operand size for extended register");
12678 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12679 fprintf (file
, ", %%%s", reg
);
12681 fprintf (file
, ", %s", reg
);
12685 /* Meaning of CODE:
12686 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12687 C -- print opcode suffix for set/cmov insn.
12688 c -- like C, but print reversed condition
12689 F,f -- likewise, but for floating-point.
12690 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12692 R -- print embedded rounding and sae.
12693 r -- print only sae.
12694 z -- print the opcode suffix for the size of the current operand.
12695 Z -- likewise, with special suffixes for x87 instructions.
12696 * -- print a star (in certain assembler syntax)
12697 A -- print an absolute memory reference.
12698 E -- print address with DImode register names if TARGET_64BIT.
12699 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12700 s -- print a shift double count, followed by the assemblers argument
12702 b -- print the QImode name of the register for the indicated operand.
12703 %b0 would print %al if operands[0] is reg 0.
12704 w -- likewise, print the HImode name of the register.
12705 k -- likewise, print the SImode name of the register.
12706 q -- likewise, print the DImode name of the register.
12707 x -- likewise, print the V4SFmode name of the register.
12708 t -- likewise, print the V8SFmode name of the register.
12709 g -- likewise, print the V16SFmode name of the register.
12710 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12711 y -- print "st(0)" instead of "st" as a register.
12712 d -- print duplicated register operand for AVX instruction.
12713 D -- print condition for SSE cmp instruction.
12714 P -- if PIC, print an @PLT suffix.
12715 p -- print raw symbol name.
12716 X -- don't print any sort of PIC '@' suffix for a symbol.
12717 & -- print some in-use local-dynamic symbol name.
12718 H -- print a memory address offset by 8; used for sse high-parts
12719 Y -- print condition for XOP pcom* instruction.
12720 V -- print naked full integer register name without %.
12721 + -- print a branch hint as 'cs' or 'ds' prefix
12722 ; -- print a semicolon (after prefixes due to bug in older gas).
12723 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12724 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12725 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12726 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12727 N -- print maskz if it's constant 0 operand.
12731 ix86_print_operand (FILE *file
, rtx x
, int code
)
12738 switch (ASSEMBLER_DIALECT
)
12745 /* Intel syntax. For absolute addresses, registers should not
12746 be surrounded by braces. */
12750 ix86_print_operand (file
, x
, 0);
12757 gcc_unreachable ();
12760 ix86_print_operand (file
, x
, 0);
12764 /* Wrap address in an UNSPEC to declare special handling. */
12766 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
12768 output_address (VOIDmode
, x
);
12772 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12777 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12782 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12787 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12792 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12797 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12802 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12803 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
12806 switch (GET_MODE_SIZE (GET_MODE (x
)))
12821 output_operand_lossage ("invalid operand size for operand "
12831 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12833 /* Opcodes don't get size suffixes if using Intel opcodes. */
12834 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12837 switch (GET_MODE_SIZE (GET_MODE (x
)))
12856 output_operand_lossage ("invalid operand size for operand "
12862 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12863 warning (0, "non-integer operand used with operand code %<z%>");
12867 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12868 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12871 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12873 switch (GET_MODE_SIZE (GET_MODE (x
)))
12876 #ifdef HAVE_AS_IX86_FILDS
12886 #ifdef HAVE_AS_IX86_FILDQ
12889 fputs ("ll", file
);
12897 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12899 /* 387 opcodes don't get size suffixes
12900 if the operands are registers. */
12901 if (STACK_REG_P (x
))
12904 switch (GET_MODE_SIZE (GET_MODE (x
)))
12925 output_operand_lossage ("invalid operand type used with "
12926 "operand code 'Z'");
12930 output_operand_lossage ("invalid operand size for operand code 'Z'");
12950 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
12952 ix86_print_operand (file
, x
, 0);
12953 fputs (", ", file
);
12958 switch (GET_CODE (x
))
12961 fputs ("neq", file
);
12964 fputs ("eq", file
);
12968 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
12972 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
12976 fputs ("le", file
);
12980 fputs ("lt", file
);
12983 fputs ("unord", file
);
12986 fputs ("ord", file
);
12989 fputs ("ueq", file
);
12992 fputs ("nlt", file
);
12995 fputs ("nle", file
);
12998 fputs ("ule", file
);
13001 fputs ("ult", file
);
13004 fputs ("une", file
);
13007 output_operand_lossage ("operand is not a condition code, "
13008 "invalid operand code 'Y'");
13014 /* Little bit of braindamage here. The SSE compare instructions
13015 does use completely different names for the comparisons that the
13016 fp conditional moves. */
13017 switch (GET_CODE (x
))
13022 fputs ("eq_us", file
);
13027 fputs ("eq", file
);
13032 fputs ("nge", file
);
13037 fputs ("lt", file
);
13042 fputs ("ngt", file
);
13047 fputs ("le", file
);
13050 fputs ("unord", file
);
13055 fputs ("neq_oq", file
);
13060 fputs ("neq", file
);
13065 fputs ("ge", file
);
13070 fputs ("nlt", file
);
13075 fputs ("gt", file
);
13080 fputs ("nle", file
);
13083 fputs ("ord", file
);
13086 output_operand_lossage ("operand is not a condition code, "
13087 "invalid operand code 'D'");
13094 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13095 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13097 gcc_fallthrough ();
13102 if (!COMPARISON_P (x
))
13104 output_operand_lossage ("operand is not a condition code, "
13105 "invalid operand code '%c'", code
);
13108 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
13109 code
== 'c' || code
== 'f',
13110 code
== 'F' || code
== 'f',
13115 if (!offsettable_memref_p (x
))
13117 output_operand_lossage ("operand is not an offsettable memory "
13118 "reference, invalid operand code 'H'");
13121 /* It doesn't actually matter what mode we use here, as we're
13122 only going to use this for printing. */
13123 x
= adjust_address_nv (x
, DImode
, 8);
13124 /* Output 'qword ptr' for intel assembler dialect. */
13125 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13130 if (!CONST_INT_P (x
))
13132 output_operand_lossage ("operand is not an integer, invalid "
13133 "operand code 'K'");
13137 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
13138 #ifdef HAVE_AS_IX86_HLE
13139 fputs ("xacquire ", file
);
13141 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
13143 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
13144 #ifdef HAVE_AS_IX86_HLE
13145 fputs ("xrelease ", file
);
13147 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
13149 /* We do not want to print value of the operand. */
13153 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
13154 fputs ("{z}", file
);
13158 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
13160 output_operand_lossage ("operand is not a specific integer, "
13161 "invalid operand code 'r'");
13165 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13166 fputs (", ", file
);
13168 fputs ("{sae}", file
);
13170 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13171 fputs (", ", file
);
13176 if (!CONST_INT_P (x
))
13178 output_operand_lossage ("operand is not an integer, invalid "
13179 "operand code 'R'");
13183 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13184 fputs (", ", file
);
13186 switch (INTVAL (x
))
13188 case ROUND_NEAREST_INT
| ROUND_SAE
:
13189 fputs ("{rn-sae}", file
);
13191 case ROUND_NEG_INF
| ROUND_SAE
:
13192 fputs ("{rd-sae}", file
);
13194 case ROUND_POS_INF
| ROUND_SAE
:
13195 fputs ("{ru-sae}", file
);
13197 case ROUND_ZERO
| ROUND_SAE
:
13198 fputs ("{rz-sae}", file
);
13201 output_operand_lossage ("operand is not a specific integer, "
13202 "invalid operand code 'R'");
13205 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13206 fputs (", ", file
);
13211 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13217 const char *name
= get_some_local_dynamic_name ();
13219 output_operand_lossage ("'%%&' used without any "
13220 "local dynamic TLS references");
13222 assemble_name (file
, name
);
13231 || optimize_function_for_size_p (cfun
)
13232 || !TARGET_BRANCH_PREDICTION_HINTS
)
13235 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
13238 int pred_val
= profile_probability::from_reg_br_prob_note
13239 (XINT (x
, 0)).to_reg_br_prob_base ();
13241 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
13242 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
13244 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
13246 = final_forward_branch_p (current_output_insn
) == 0;
13248 /* Emit hints only in the case default branch prediction
13249 heuristics would fail. */
13250 if (taken
!= cputaken
)
13252 /* We use 3e (DS) prefix for taken branches and
13253 2e (CS) prefix for not taken branches. */
13255 fputs ("ds ; ", file
);
13257 fputs ("cs ; ", file
);
13265 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13271 putc (TARGET_AVX2
? 'i' : 'f', file
);
13277 /* NB: 32-bit indices in VSIB address are sign-extended
13278 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13279 sign-extended to 0xfffffffff7fa3010 which is invalid
13280 address. Add addr32 prefix if there is no base
13281 register nor symbol. */
13283 struct ix86_address parts
;
13284 ok
= ix86_decompose_address (x
, &parts
);
13285 gcc_assert (ok
&& parts
.index
== NULL_RTX
);
13286 if (parts
.base
== NULL_RTX
13287 && (parts
.disp
== NULL_RTX
13288 || !symbolic_operand (parts
.disp
,
13289 GET_MODE (parts
.disp
))))
13290 fputs ("addr32 ", file
);
13295 if (TARGET_64BIT
&& Pmode
!= word_mode
)
13296 fputs ("addr32 ", file
);
13300 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
13301 fputs ("notrack ", file
);
13305 output_operand_lossage ("invalid operand code '%c'", code
);
13310 print_reg (x
, code
, file
);
13312 else if (MEM_P (x
))
13314 rtx addr
= XEXP (x
, 0);
13316 /* No `byte ptr' prefix for call instructions ... */
13317 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
13319 machine_mode mode
= GET_MODE (x
);
13322 /* Check for explicit size override codes. */
13325 else if (code
== 'w')
13327 else if (code
== 'k')
13329 else if (code
== 'q')
13331 else if (code
== 'x')
13333 else if (code
== 't')
13335 else if (code
== 'g')
13337 else if (mode
== BLKmode
)
13338 /* ... or BLKmode operands, when not overridden. */
13341 switch (GET_MODE_SIZE (mode
))
13343 case 1: size
= "BYTE"; break;
13344 case 2: size
= "WORD"; break;
13345 case 4: size
= "DWORD"; break;
13346 case 8: size
= "QWORD"; break;
13347 case 12: size
= "TBYTE"; break;
13349 if (mode
== XFmode
)
13354 case 32: size
= "YMMWORD"; break;
13355 case 64: size
= "ZMMWORD"; break;
13357 gcc_unreachable ();
13361 fputs (size
, file
);
13362 fputs (" PTR ", file
);
13366 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
13367 output_operand_lossage ("invalid constraints for operand");
13369 ix86_print_operand_address_as
13370 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
13373 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
13377 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
13379 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13381 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13383 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
13384 (unsigned long long) (int) l
);
13386 fprintf (file
, "0x%08x", (unsigned int) l
);
13389 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
13393 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
13395 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13397 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
13400 /* These float cases don't actually occur as immediate operands. */
13401 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
13405 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
13406 fputs (dstr
, file
);
13409 /* Print bcst_mem_operand. */
13410 else if (GET_CODE (x
) == VEC_DUPLICATE
)
13412 machine_mode vmode
= GET_MODE (x
);
13413 /* Must be bcst_memory_operand. */
13414 gcc_assert (bcst_mem_operand (x
, vmode
));
13416 rtx mem
= XEXP (x
,0);
13417 ix86_print_operand (file
, mem
, 0);
13423 fputs ("{1to2}", file
);
13429 fputs ("{1to4}", file
);
13435 fputs ("{1to8}", file
);
13439 fputs ("{1to16}", file
);
13442 gcc_unreachable ();
13448 /* We have patterns that allow zero sets of memory, for instance.
13449 In 64-bit mode, we should probably support all 8-byte vectors,
13450 since we can in fact encode that into an immediate. */
13451 if (GET_CODE (x
) == CONST_VECTOR
)
13453 if (x
!= CONST0_RTX (GET_MODE (x
)))
13454 output_operand_lossage ("invalid vector immediate");
13458 if (code
!= 'P' && code
!= 'p')
13460 if (CONST_INT_P (x
))
13462 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13465 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
13466 || GET_CODE (x
) == LABEL_REF
)
13468 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13471 fputs ("OFFSET FLAT:", file
);
13474 if (CONST_INT_P (x
))
13475 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13476 else if (flag_pic
|| MACHOPIC_INDIRECT
)
13477 output_pic_addr_const (file
, x
, code
);
13479 output_addr_const (file
, x
);
13484 ix86_print_operand_punct_valid_p (unsigned char code
)
13486 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
13487 || code
== '~' || code
== '^' || code
== '!');
13490 /* Print a memory operand whose address is ADDR. */
13493 ix86_print_operand_address_as (FILE *file
, rtx addr
,
13494 addr_space_t as
, bool no_rip
)
13496 struct ix86_address parts
;
13497 rtx base
, index
, disp
;
13503 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
13505 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13506 gcc_assert (parts
.index
== NULL_RTX
);
13507 parts
.index
= XVECEXP (addr
, 0, 1);
13508 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
13509 addr
= XVECEXP (addr
, 0, 0);
13512 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
13514 gcc_assert (TARGET_64BIT
);
13515 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13519 ok
= ix86_decompose_address (addr
, &parts
);
13524 index
= parts
.index
;
13526 scale
= parts
.scale
;
13528 if (ADDR_SPACE_GENERIC_P (as
))
13531 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
13533 if (!ADDR_SPACE_GENERIC_P (as
))
13535 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13540 case ADDR_SPACE_SEG_FS
:
13541 fputs ("fs:", file
);
13543 case ADDR_SPACE_SEG_GS
:
13544 fputs ("gs:", file
);
13547 gcc_unreachable ();
13551 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13552 if (TARGET_64BIT
&& !base
&& !index
&& !no_rip
)
13556 if (GET_CODE (disp
) == CONST
13557 && GET_CODE (XEXP (disp
, 0)) == PLUS
13558 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13559 symbol
= XEXP (XEXP (disp
, 0), 0);
13561 if (GET_CODE (symbol
) == LABEL_REF
13562 || (GET_CODE (symbol
) == SYMBOL_REF
13563 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
13567 if (!base
&& !index
)
13569 /* Displacement only requires special attention. */
13570 if (CONST_INT_P (disp
))
13572 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
13573 fputs ("ds:", file
);
13574 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
13576 /* Load the external function address via the GOT slot to avoid PLT. */
13577 else if (GET_CODE (disp
) == CONST
13578 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
13579 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
13580 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
13581 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
13582 output_pic_addr_const (file
, disp
, 0);
13584 output_pic_addr_const (file
, disp
, 0);
13586 output_addr_const (file
, disp
);
13590 /* Print SImode register names to force addr32 prefix. */
13591 if (SImode_address_operand (addr
, VOIDmode
))
13595 gcc_assert (TARGET_64BIT
);
13596 switch (GET_CODE (addr
))
13599 gcc_assert (GET_MODE (addr
) == SImode
);
13600 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
13604 gcc_assert (GET_MODE (addr
) == DImode
);
13607 gcc_unreachable ();
13610 gcc_assert (!code
);
13616 && CONST_INT_P (disp
)
13617 && INTVAL (disp
) < -16*1024*1024)
13619 /* X32 runs in 64-bit mode, where displacement, DISP, in
13620 address DISP(%r64), is encoded as 32-bit immediate sign-
13621 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13622 address is %r64 + 0xffffffffbffffd00. When %r64 <
13623 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13624 which is invalid for x32. The correct address is %r64
13625 - 0x40000300 == 0xf7ffdd64. To properly encode
13626 -0x40000300(%r64) for x32, we zero-extend negative
13627 displacement by forcing addr32 prefix which truncates
13628 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13629 zero-extend all negative displacements, including -1(%rsp).
13630 However, for small negative displacements, sign-extension
13631 won't cause overflow. We only zero-extend negative
13632 displacements if they < -16*1024*1024, which is also used
13633 to check legitimate address displacements for PIC. */
13637 /* Since the upper 32 bits of RSP are always zero for x32,
13638 we can encode %esp as %rsp to avoid 0x67 prefix if
13639 there is no index register. */
13640 if (TARGET_X32
&& Pmode
== SImode
13641 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
13644 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13649 output_pic_addr_const (file
, disp
, 0);
13650 else if (GET_CODE (disp
) == LABEL_REF
)
13651 output_asm_label (disp
);
13653 output_addr_const (file
, disp
);
13658 print_reg (base
, code
, file
);
13662 print_reg (index
, vsib
? 0 : code
, file
);
13663 if (scale
!= 1 || vsib
)
13664 fprintf (file
, ",%d", scale
);
13670 rtx offset
= NULL_RTX
;
13674 /* Pull out the offset of a symbol; print any symbol itself. */
13675 if (GET_CODE (disp
) == CONST
13676 && GET_CODE (XEXP (disp
, 0)) == PLUS
13677 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13679 offset
= XEXP (XEXP (disp
, 0), 1);
13680 disp
= gen_rtx_CONST (VOIDmode
,
13681 XEXP (XEXP (disp
, 0), 0));
13685 output_pic_addr_const (file
, disp
, 0);
13686 else if (GET_CODE (disp
) == LABEL_REF
)
13687 output_asm_label (disp
);
13688 else if (CONST_INT_P (disp
))
13691 output_addr_const (file
, disp
);
13697 print_reg (base
, code
, file
);
13700 if (INTVAL (offset
) >= 0)
13702 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13706 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13713 print_reg (index
, vsib
? 0 : code
, file
);
13714 if (scale
!= 1 || vsib
)
13715 fprintf (file
, "*%d", scale
);
13723 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
13725 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
13728 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13731 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
13735 if (GET_CODE (x
) != UNSPEC
)
13738 op
= XVECEXP (x
, 0, 0);
13739 switch (XINT (x
, 1))
13741 case UNSPEC_GOTOFF
:
13742 output_addr_const (file
, op
);
13743 fputs ("@gotoff", file
);
13745 case UNSPEC_GOTTPOFF
:
13746 output_addr_const (file
, op
);
13747 /* FIXME: This might be @TPOFF in Sun ld. */
13748 fputs ("@gottpoff", file
);
13751 output_addr_const (file
, op
);
13752 fputs ("@tpoff", file
);
13754 case UNSPEC_NTPOFF
:
13755 output_addr_const (file
, op
);
13757 fputs ("@tpoff", file
);
13759 fputs ("@ntpoff", file
);
13761 case UNSPEC_DTPOFF
:
13762 output_addr_const (file
, op
);
13763 fputs ("@dtpoff", file
);
13765 case UNSPEC_GOTNTPOFF
:
13766 output_addr_const (file
, op
);
13768 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13769 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
13771 fputs ("@gotntpoff", file
);
13773 case UNSPEC_INDNTPOFF
:
13774 output_addr_const (file
, op
);
13775 fputs ("@indntpoff", file
);
13778 case UNSPEC_MACHOPIC_OFFSET
:
13779 output_addr_const (file
, op
);
13781 machopic_output_function_base_name (file
);
13793 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13794 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13795 is the expression of the binary operation. The output may either be
13796 emitted here, or returned to the caller, like all output_* functions.
13798 There is no guarantee that the operands are the same mode, as they
13799 might be within FLOAT or FLOAT_EXTEND expressions. */
13801 #ifndef SYSV386_COMPAT
13802 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13803 wants to fix the assemblers because that causes incompatibility
13804 with gcc. No-one wants to fix gcc because that causes
13805 incompatibility with assemblers... You can use the option of
13806 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13807 #define SYSV386_COMPAT 1
13811 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
13813 static char buf
[40];
13816 = (SSE_REG_P (operands
[0])
13817 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
13821 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
13822 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
13829 switch (GET_CODE (operands
[3]))
13840 gcc_unreachable ();
13847 p
= (GET_MODE (operands
[0]) == SFmode
) ? "ss" : "sd";
13851 p
= "\t{%2, %1, %0|%0, %1, %2}";
13853 p
= "\t{%2, %0|%0, %2}";
13859 /* Even if we do not want to check the inputs, this documents input
13860 constraints. Which helps in understanding the following code. */
13863 if (STACK_REG_P (operands
[0])
13864 && ((REG_P (operands
[1])
13865 && REGNO (operands
[0]) == REGNO (operands
[1])
13866 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
13867 || (REG_P (operands
[2])
13868 && REGNO (operands
[0]) == REGNO (operands
[2])
13869 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
13870 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
13873 gcc_unreachable ();
13876 switch (GET_CODE (operands
[3]))
13880 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
13881 std::swap (operands
[1], operands
[2]);
13883 /* know operands[0] == operands[1]. */
13885 if (MEM_P (operands
[2]))
13891 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13893 if (STACK_TOP_P (operands
[0]))
13894 /* How is it that we are storing to a dead operand[2]?
13895 Well, presumably operands[1] is dead too. We can't
13896 store the result to st(0) as st(0) gets popped on this
13897 instruction. Instead store to operands[2] (which I
13898 think has to be st(1)). st(1) will be popped later.
13899 gcc <= 2.8.1 didn't have this check and generated
13900 assembly code that the Unixware assembler rejected. */
13901 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13903 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13907 if (STACK_TOP_P (operands
[0]))
13908 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13910 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13915 if (MEM_P (operands
[1]))
13921 if (MEM_P (operands
[2]))
13927 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13930 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13931 derived assemblers, confusingly reverse the direction of
13932 the operation for fsub{r} and fdiv{r} when the
13933 destination register is not st(0). The Intel assembler
13934 doesn't have this brain damage. Read !SYSV386_COMPAT to
13935 figure out what the hardware really does. */
13936 if (STACK_TOP_P (operands
[0]))
13937 p
= "{p\t%0, %2|rp\t%2, %0}";
13939 p
= "{rp\t%2, %0|p\t%0, %2}";
13941 if (STACK_TOP_P (operands
[0]))
13942 /* As above for fmul/fadd, we can't store to st(0). */
13943 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13945 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13950 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
13953 if (STACK_TOP_P (operands
[0]))
13954 p
= "{rp\t%0, %1|p\t%1, %0}";
13956 p
= "{p\t%1, %0|rp\t%0, %1}";
13958 if (STACK_TOP_P (operands
[0]))
13959 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13961 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13966 if (STACK_TOP_P (operands
[0]))
13968 if (STACK_TOP_P (operands
[1]))
13969 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13971 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13974 else if (STACK_TOP_P (operands
[1]))
13977 p
= "{\t%1, %0|r\t%0, %1}";
13979 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13985 p
= "{r\t%2, %0|\t%0, %2}";
13987 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13993 gcc_unreachable ();
14000 /* Return needed mode for entity in optimize_mode_switching pass. */
14003 ix86_dirflag_mode_needed (rtx_insn
*insn
)
14007 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
14008 return X86_DIRFLAG_ANY
;
14010 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14011 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
14014 if (recog_memoized (insn
) < 0)
14015 return X86_DIRFLAG_ANY
;
14017 if (get_attr_type (insn
) == TYPE_STR
)
14019 /* Emit cld instruction if stringops are used in the function. */
14020 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
14021 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
14023 return X86_DIRFLAG_RESET
;
14026 return X86_DIRFLAG_ANY
;
14029 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14032 ix86_check_avx_upper_register (const_rtx exp
)
14034 return (SSE_REG_P (exp
)
14035 && !EXT_REX_SSE_REG_P (exp
)
14036 && GET_MODE_BITSIZE (GET_MODE (exp
)) > 128);
14039 /* Return needed mode for entity in optimize_mode_switching pass. */
14042 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
14048 /* Needed mode is set to AVX_U128_CLEAN if there are
14049 no 256bit or 512bit modes used in function arguments. */
14050 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
14052 link
= XEXP (link
, 1))
14054 if (GET_CODE (XEXP (link
, 0)) == USE
)
14056 rtx arg
= XEXP (XEXP (link
, 0), 0);
14058 if (ix86_check_avx_upper_register (arg
))
14059 return AVX_U128_DIRTY
;
14063 /* If the function is known to preserve some SSE registers,
14064 RA and previous passes can legitimately rely on that for
14065 modes wider than 256 bits. It's only safe to issue a
14066 vzeroupper if all SSE registers are clobbered. */
14067 const function_abi
&abi
= insn_callee_abi (insn
);
14068 if (!hard_reg_set_subset_p (reg_class_contents
[SSE_REGS
],
14069 abi
.mode_clobbers (V4DImode
)))
14070 return AVX_U128_ANY
;
14072 return AVX_U128_CLEAN
;
14075 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14076 Hardware changes state only when a 256bit register is written to,
14077 but we need to prevent the compiler from moving optimal insertion
14078 point above eventual read from 256bit or 512 bit register. */
14079 subrtx_iterator::array_type array
;
14080 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
14081 if (ix86_check_avx_upper_register (*iter
))
14082 return AVX_U128_DIRTY
;
14084 return AVX_U128_ANY
;
14087 /* Return mode that i387 must be switched into
14088 prior to the execution of insn. */
14091 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
14093 enum attr_i387_cw mode
;
14095 /* The mode UNINITIALIZED is used to store control word after a
14096 function call or ASM pattern. The mode ANY specify that function
14097 has no requirements on the control word and make no changes in the
14098 bits we are interested in. */
14101 || (NONJUMP_INSN_P (insn
)
14102 && (asm_noperands (PATTERN (insn
)) >= 0
14103 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14104 return I387_CW_UNINITIALIZED
;
14106 if (recog_memoized (insn
) < 0)
14107 return I387_CW_ANY
;
14109 mode
= get_attr_i387_cw (insn
);
14113 case I387_ROUNDEVEN
:
14114 if (mode
== I387_CW_ROUNDEVEN
)
14119 if (mode
== I387_CW_TRUNC
)
14124 if (mode
== I387_CW_FLOOR
)
14129 if (mode
== I387_CW_CEIL
)
14134 gcc_unreachable ();
14137 return I387_CW_ANY
;
14140 /* Return mode that entity must be switched into
14141 prior to the execution of insn. */
14144 ix86_mode_needed (int entity
, rtx_insn
*insn
)
14149 return ix86_dirflag_mode_needed (insn
);
14151 return ix86_avx_u128_mode_needed (insn
);
14152 case I387_ROUNDEVEN
:
14156 return ix86_i387_mode_needed (entity
, insn
);
14158 gcc_unreachable ();
14163 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14166 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
14168 if (ix86_check_avx_upper_register (dest
))
14170 bool *used
= (bool *) data
;
14175 /* Calculate mode of upper 128bit AVX registers after the insn. */
14178 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
14180 rtx pat
= PATTERN (insn
);
14182 if (vzeroupper_pattern (pat
, VOIDmode
)
14183 || vzeroall_pattern (pat
, VOIDmode
))
14184 return AVX_U128_CLEAN
;
14186 /* We know that state is clean after CALL insn if there are no
14187 256bit or 512bit registers used in the function return register. */
14190 bool avx_upper_reg_found
= false;
14191 note_stores (insn
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
14193 return avx_upper_reg_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
14196 /* Otherwise, return current mode. Remember that if insn
14197 references AVX 256bit or 512bit registers, the mode was already
14198 changed to DIRTY from MODE_NEEDED. */
14202 /* Return the mode that an insn results in. */
14205 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
14212 return ix86_avx_u128_mode_after (mode
, insn
);
14213 case I387_ROUNDEVEN
:
14219 gcc_unreachable ();
14224 ix86_dirflag_mode_entry (void)
14226 /* For TARGET_CLD or in the interrupt handler we can't assume
14227 direction flag state at function entry. */
14229 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
14230 return X86_DIRFLAG_ANY
;
14232 return X86_DIRFLAG_RESET
;
14236 ix86_avx_u128_mode_entry (void)
14240 /* Entry mode is set to AVX_U128_DIRTY if there are
14241 256bit or 512bit modes used in function arguments. */
14242 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
14243 arg
= TREE_CHAIN (arg
))
14245 rtx incoming
= DECL_INCOMING_RTL (arg
);
14247 if (incoming
&& ix86_check_avx_upper_register (incoming
))
14248 return AVX_U128_DIRTY
;
14251 return AVX_U128_CLEAN
;
14254 /* Return a mode that ENTITY is assumed to be
14255 switched to at function entry. */
14258 ix86_mode_entry (int entity
)
14263 return ix86_dirflag_mode_entry ();
14265 return ix86_avx_u128_mode_entry ();
14266 case I387_ROUNDEVEN
:
14270 return I387_CW_ANY
;
14272 gcc_unreachable ();
14277 ix86_avx_u128_mode_exit (void)
14279 rtx reg
= crtl
->return_rtx
;
14281 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14282 or 512 bit modes used in the function return register. */
14283 if (reg
&& ix86_check_avx_upper_register (reg
))
14284 return AVX_U128_DIRTY
;
14286 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14287 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14289 return ix86_avx_u128_mode_entry ();
14292 /* Return a mode that ENTITY is assumed to be
14293 switched to at function exit. */
14296 ix86_mode_exit (int entity
)
14301 return X86_DIRFLAG_ANY
;
14303 return ix86_avx_u128_mode_exit ();
14304 case I387_ROUNDEVEN
:
14308 return I387_CW_ANY
;
14310 gcc_unreachable ();
14315 ix86_mode_priority (int, int n
)
14320 /* Output code to initialize control word copies used by trunc?f?i and
14321 rounding patterns. CURRENT_MODE is set to current control word,
14322 while NEW_MODE is set to new control word. */
14325 emit_i387_cw_initialization (int mode
)
14327 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14330 enum ix86_stack_slot slot
;
14332 rtx reg
= gen_reg_rtx (HImode
);
14334 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14335 emit_move_insn (reg
, copy_rtx (stored_mode
));
14339 case I387_CW_ROUNDEVEN
:
14340 /* round to nearest */
14341 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14342 slot
= SLOT_CW_ROUNDEVEN
;
14345 case I387_CW_TRUNC
:
14346 /* round toward zero (truncate) */
14347 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14348 slot
= SLOT_CW_TRUNC
;
14351 case I387_CW_FLOOR
:
14352 /* round down toward -oo */
14353 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14354 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14355 slot
= SLOT_CW_FLOOR
;
14359 /* round up toward +oo */
14360 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14361 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14362 slot
= SLOT_CW_CEIL
;
14366 gcc_unreachable ();
14369 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
14371 new_mode
= assign_386_stack_local (HImode
, slot
);
14372 emit_move_insn (new_mode
, reg
);
14375 /* Generate one or more insns to set ENTITY to MODE. */
14378 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
14379 HARD_REG_SET regs_live ATTRIBUTE_UNUSED
)
14384 if (mode
== X86_DIRFLAG_RESET
)
14385 emit_insn (gen_cld ());
14388 if (mode
== AVX_U128_CLEAN
)
14389 emit_insn (gen_avx_vzeroupper ());
14391 case I387_ROUNDEVEN
:
14395 if (mode
!= I387_CW_ANY
14396 && mode
!= I387_CW_UNINITIALIZED
)
14397 emit_i387_cw_initialization (mode
);
14400 gcc_unreachable ();
14404 /* Output code for INSN to convert a float to a signed int. OPERANDS
14405 are the insn operands. The output may be [HSD]Imode and the input
14406 operand may be [SDX]Fmode. */
14409 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
14411 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
14412 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
14413 int round_mode
= get_attr_i387_cw (insn
);
14415 static char buf
[40];
14418 /* Jump through a hoop or two for DImode, since the hardware has no
14419 non-popping instruction. We used to do this a different way, but
14420 that was somewhat fragile and broke with post-reload splitters. */
14421 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
14422 output_asm_insn ("fld\t%y1", operands
);
14424 gcc_assert (STACK_TOP_P (operands
[1]));
14425 gcc_assert (MEM_P (operands
[0]));
14426 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
14429 return "fisttp%Z0\t%0";
14431 strcpy (buf
, "fist");
14433 if (round_mode
!= I387_CW_ANY
)
14434 output_asm_insn ("fldcw\t%3", operands
);
14437 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
14439 output_asm_insn (buf
, operands
);
14441 if (round_mode
!= I387_CW_ANY
)
14442 output_asm_insn ("fldcw\t%2", operands
);
14447 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14448 have the values zero or one, indicates the ffreep insn's operand
14449 from the OPERANDS array. */
14451 static const char *
14452 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
14454 if (TARGET_USE_FFREEP
)
14455 #ifdef HAVE_AS_IX86_FFREEP
14456 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
14459 static char retval
[32];
14460 int regno
= REGNO (operands
[opno
]);
14462 gcc_assert (STACK_REGNO_P (regno
));
14464 regno
-= FIRST_STACK_REG
;
14466 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
14471 return opno
? "fstp\t%y1" : "fstp\t%y0";
14475 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14476 should be used. UNORDERED_P is true when fucom should be used. */
14479 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
14480 bool eflags_p
, bool unordered_p
)
14482 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
14483 bool stack_top_dies
;
14485 static char buf
[40];
14488 gcc_assert (STACK_TOP_P (xops
[0]));
14490 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
14494 p
= unordered_p
? "fucomi" : "fcomi";
14497 p
= "p\t{%y1, %0|%0, %y1}";
14498 strcat (buf
, p
+ !stack_top_dies
);
14503 if (STACK_REG_P (xops
[1])
14505 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
14507 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
14509 /* If both the top of the 387 stack die, and the other operand
14510 is also a stack register that dies, then this must be a
14511 `fcompp' float compare. */
14512 p
= unordered_p
? "fucompp" : "fcompp";
14515 else if (const0_operand (xops
[1], VOIDmode
))
14517 gcc_assert (!unordered_p
);
14518 strcpy (buf
, "ftst");
14522 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
14524 gcc_assert (!unordered_p
);
14528 p
= unordered_p
? "fucom" : "fcom";
14533 strcat (buf
, p
+ !stack_top_dies
);
14536 output_asm_insn (buf
, operands
);
14537 return "fnstsw\t%0";
14541 ix86_output_addr_vec_elt (FILE *file
, int value
)
14543 const char *directive
= ASM_LONG
;
14547 directive
= ASM_QUAD
;
14549 gcc_assert (!TARGET_64BIT
);
14552 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
14556 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
14558 const char *directive
= ASM_LONG
;
14561 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
14562 directive
= ASM_QUAD
;
14564 gcc_assert (!TARGET_64BIT
);
14566 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14567 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
14568 fprintf (file
, "%s%s%d-%s%d\n",
14569 directive
, LPREFIX
, value
, LPREFIX
, rel
);
14571 else if (TARGET_MACHO
)
14573 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
14574 machopic_output_function_base_name (file
);
14578 else if (HAVE_AS_GOTOFF_IN_DATA
)
14579 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
14581 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
14582 GOT_SYMBOL_NAME
, LPREFIX
, value
);
14585 #define LEA_MAX_STALL (3)
14586 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14588 /* Increase given DISTANCE in half-cycles according to
14589 dependencies between PREV and NEXT instructions.
14590 Add 1 half-cycle if there is no dependency and
14591 go to next cycle if there is some dependecy. */
14593 static unsigned int
14594 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
14598 if (!prev
|| !next
)
14599 return distance
+ (distance
& 1) + 2;
14601 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
14602 return distance
+ 1;
14604 FOR_EACH_INSN_USE (use
, next
)
14605 FOR_EACH_INSN_DEF (def
, prev
)
14606 if (!DF_REF_IS_ARTIFICIAL (def
)
14607 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
14608 return distance
+ (distance
& 1) + 2;
14610 return distance
+ 1;
14613 /* Function checks if instruction INSN defines register number
14614 REGNO1 or REGNO2. */
14617 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
14622 FOR_EACH_INSN_DEF (def
, insn
)
14623 if (DF_REF_REG_DEF_P (def
)
14624 && !DF_REF_IS_ARTIFICIAL (def
)
14625 && (regno1
== DF_REF_REGNO (def
)
14626 || regno2
== DF_REF_REGNO (def
)))
14632 /* Function checks if instruction INSN uses register number
14633 REGNO as a part of address expression. */
14636 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
14640 FOR_EACH_INSN_USE (use
, insn
)
14641 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
14647 /* Search backward for non-agu definition of register number REGNO1
14648 or register number REGNO2 in basic block starting from instruction
14649 START up to head of basic block or instruction INSN.
14651 Function puts true value into *FOUND var if definition was found
14652 and false otherwise.
14654 Distance in half-cycles between START and found instruction or head
14655 of BB is added to DISTANCE and returned. */
14658 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
14659 rtx_insn
*insn
, int distance
,
14660 rtx_insn
*start
, bool *found
)
14662 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
14663 rtx_insn
*prev
= start
;
14664 rtx_insn
*next
= NULL
;
14670 && distance
< LEA_SEARCH_THRESHOLD
)
14672 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
14674 distance
= increase_distance (prev
, next
, distance
);
14675 if (insn_defines_reg (regno1
, regno2
, prev
))
14677 if (recog_memoized (prev
) < 0
14678 || get_attr_type (prev
) != TYPE_LEA
)
14687 if (prev
== BB_HEAD (bb
))
14690 prev
= PREV_INSN (prev
);
14696 /* Search backward for non-agu definition of register number REGNO1
14697 or register number REGNO2 in INSN's basic block until
14698 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14699 2. Reach neighbor BBs boundary, or
14700 3. Reach agu definition.
14701 Returns the distance between the non-agu definition point and INSN.
14702 If no definition point, returns -1. */
14705 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
14708 basic_block bb
= BLOCK_FOR_INSN (insn
);
14710 bool found
= false;
14712 if (insn
!= BB_HEAD (bb
))
14713 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
14714 distance
, PREV_INSN (insn
),
14717 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
14721 bool simple_loop
= false;
14723 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14726 simple_loop
= true;
14731 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
14733 BB_END (bb
), &found
);
14736 int shortest_dist
= -1;
14737 bool found_in_bb
= false;
14739 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14742 = distance_non_agu_define_in_bb (regno1
, regno2
,
14748 if (shortest_dist
< 0)
14749 shortest_dist
= bb_dist
;
14750 else if (bb_dist
> 0)
14751 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14757 distance
= shortest_dist
;
14761 /* get_attr_type may modify recog data. We want to make sure
14762 that recog data is valid for instruction INSN, on which
14763 distance_non_agu_define is called. INSN is unchanged here. */
14764 extract_insn_cached (insn
);
14769 return distance
>> 1;
14772 /* Return the distance in half-cycles between INSN and the next
14773 insn that uses register number REGNO in memory address added
14774 to DISTANCE. Return -1 if REGNO0 is set.
14776 Put true value into *FOUND if register usage was found and
14778 Put true value into *REDEFINED if register redefinition was
14779 found and false otherwise. */
14782 distance_agu_use_in_bb (unsigned int regno
,
14783 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
14784 bool *found
, bool *redefined
)
14786 basic_block bb
= NULL
;
14787 rtx_insn
*next
= start
;
14788 rtx_insn
*prev
= NULL
;
14791 *redefined
= false;
14793 if (start
!= NULL_RTX
)
14795 bb
= BLOCK_FOR_INSN (start
);
14796 if (start
!= BB_HEAD (bb
))
14797 /* If insn and start belong to the same bb, set prev to insn,
14798 so the call to increase_distance will increase the distance
14799 between insns by 1. */
14805 && distance
< LEA_SEARCH_THRESHOLD
)
14807 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
14809 distance
= increase_distance(prev
, next
, distance
);
14810 if (insn_uses_reg_mem (regno
, next
))
14812 /* Return DISTANCE if OP0 is used in memory
14813 address in NEXT. */
14818 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
14820 /* Return -1 if OP0 is set in NEXT. */
14828 if (next
== BB_END (bb
))
14831 next
= NEXT_INSN (next
);
14837 /* Return the distance between INSN and the next insn that uses
14838 register number REGNO0 in memory address. Return -1 if no such
14839 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14842 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
14844 basic_block bb
= BLOCK_FOR_INSN (insn
);
14846 bool found
= false;
14847 bool redefined
= false;
14849 if (insn
!= BB_END (bb
))
14850 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
14852 &found
, &redefined
);
14854 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
14858 bool simple_loop
= false;
14860 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14863 simple_loop
= true;
14868 distance
= distance_agu_use_in_bb (regno0
, insn
,
14869 distance
, BB_HEAD (bb
),
14870 &found
, &redefined
);
14873 int shortest_dist
= -1;
14874 bool found_in_bb
= false;
14875 bool redefined_in_bb
= false;
14877 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14880 = distance_agu_use_in_bb (regno0
, insn
,
14881 distance
, BB_HEAD (e
->dest
),
14882 &found_in_bb
, &redefined_in_bb
);
14885 if (shortest_dist
< 0)
14886 shortest_dist
= bb_dist
;
14887 else if (bb_dist
> 0)
14888 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14894 distance
= shortest_dist
;
14898 if (!found
|| redefined
)
14901 return distance
>> 1;
14904 /* Define this macro to tune LEA priority vs ADD, it take effect when
14905 there is a dilemma of choosing LEA or ADD
14906 Negative value: ADD is more preferred than LEA
14908 Positive value: LEA is more preferred than ADD. */
14909 #define IX86_LEA_PRIORITY 0
14911 /* Return true if usage of lea INSN has performance advantage
14912 over a sequence of instructions. Instructions sequence has
14913 SPLIT_COST cycles higher latency than lea latency. */
14916 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
14917 unsigned int regno2
, int split_cost
, bool has_scale
)
14919 int dist_define
, dist_use
;
14921 /* For Atom processors newer than Bonnell, if using a 2-source or
14922 3-source LEA for non-destructive destination purposes, or due to
14923 wanting ability to use SCALE, the use of LEA is justified. */
14924 if (!TARGET_BONNELL
)
14928 if (split_cost
< 1)
14930 if (regno0
== regno1
|| regno0
== regno2
)
14935 rtx_insn
*rinsn
= recog_data
.insn
;
14937 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
14938 dist_use
= distance_agu_use (regno0
, insn
);
14940 /* distance_non_agu_define can call extract_insn_cached. If this function
14941 is called from define_split conditions, that can break insn splitting,
14942 because split_insns works by clearing recog_data.insn and then modifying
14943 recog_data.operand array and match the various split conditions. */
14944 if (recog_data
.insn
!= rinsn
)
14945 recog_data
.insn
= NULL
;
14947 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
14949 /* If there is no non AGU operand definition, no AGU
14950 operand usage and split cost is 0 then both lea
14951 and non lea variants have same priority. Currently
14952 we prefer lea for 64 bit code and non lea on 32 bit
14954 if (dist_use
< 0 && split_cost
== 0)
14955 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
14960 /* With longer definitions distance lea is more preferable.
14961 Here we change it to take into account splitting cost and
14963 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
14965 /* If there is no use in memory addess then we just check
14966 that split cost exceeds AGU stall. */
14968 return dist_define
> LEA_MAX_STALL
;
14970 /* If this insn has both backward non-agu dependence and forward
14971 agu dependence, the one with short distance takes effect. */
14972 return dist_define
>= dist_use
;
14975 /* Return true if it is legal to clobber flags by INSN and
14976 false otherwise. */
14979 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
14981 basic_block bb
= BLOCK_FOR_INSN (insn
);
14987 if (NONDEBUG_INSN_P (insn
))
14989 FOR_EACH_INSN_USE (use
, insn
)
14990 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
14993 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
14997 if (insn
== BB_END (bb
))
15000 insn
= NEXT_INSN (insn
);
15003 live
= df_get_live_out(bb
);
15004 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
15007 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15008 move and add to avoid AGU stalls. */
15011 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
15013 unsigned int regno0
, regno1
, regno2
;
15015 /* Check if we need to optimize. */
15016 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15019 /* Check it is correct to split here. */
15020 if (!ix86_ok_to_clobber_flags(insn
))
15023 regno0
= true_regnum (operands
[0]);
15024 regno1
= true_regnum (operands
[1]);
15025 regno2
= true_regnum (operands
[2]);
15027 /* We need to split only adds with non destructive
15028 destination operand. */
15029 if (regno0
== regno1
|| regno0
== regno2
)
15032 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
15035 /* Return true if we should emit lea instruction instead of mov
15039 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
15041 unsigned int regno0
, regno1
;
15043 /* Check if we need to optimize. */
15044 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15047 /* Use lea for reg to reg moves only. */
15048 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
15051 regno0
= true_regnum (operands
[0]);
15052 regno1
= true_regnum (operands
[1]);
15054 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
15057 /* Return true if we need to split lea into a sequence of
15058 instructions to avoid AGU stalls. */
15061 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
15063 unsigned int regno0
, regno1
, regno2
;
15065 struct ix86_address parts
;
15068 /* The "at least two components" test below might not catch simple
15069 move or zero extension insns if parts.base is non-NULL and parts.disp
15070 is const0_rtx as the only components in the address, e.g. if the
15071 register is %rbp or %r13. As this test is much cheaper and moves or
15072 zero extensions are the common case, do this check first. */
15073 if (REG_P (operands
[1])
15074 || (SImode_address_operand (operands
[1], VOIDmode
)
15075 && REG_P (XEXP (operands
[1], 0))))
15078 /* Check if it is OK to split here. */
15079 if (!ix86_ok_to_clobber_flags (insn
))
15082 ok
= ix86_decompose_address (operands
[1], &parts
);
15085 /* There should be at least two components in the address. */
15086 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
15087 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
15090 /* We should not split into add if non legitimate pic
15091 operand is used as displacement. */
15092 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
15095 regno0
= true_regnum (operands
[0]) ;
15096 regno1
= INVALID_REGNUM
;
15097 regno2
= INVALID_REGNUM
;
15100 regno1
= true_regnum (parts
.base
);
15102 regno2
= true_regnum (parts
.index
);
15104 /* Use add for a = a + b and a = b + a since it is faster and shorter
15105 than lea for most processors. For the processors like BONNELL, if
15106 the destination register of LEA holds an actual address which will
15107 be used soon, LEA is better and otherwise ADD is better. */
15108 if (!TARGET_BONNELL
15109 && parts
.scale
== 1
15110 && (!parts
.disp
|| parts
.disp
== const0_rtx
)
15111 && (regno0
== regno1
|| regno0
== regno2
))
15114 /* Check we need to optimize. */
15115 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
15120 /* Compute how many cycles we will add to execution time
15121 if split lea into a sequence of instructions. */
15122 if (parts
.base
|| parts
.index
)
15124 /* Have to use mov instruction if non desctructive
15125 destination form is used. */
15126 if (regno1
!= regno0
&& regno2
!= regno0
)
15129 /* Have to add index to base if both exist. */
15130 if (parts
.base
&& parts
.index
)
15133 /* Have to use shift and adds if scale is 2 or greater. */
15134 if (parts
.scale
> 1)
15136 if (regno0
!= regno1
)
15138 else if (regno2
== regno0
)
15141 split_cost
+= parts
.scale
;
15144 /* Have to use add instruction with immediate if
15145 disp is non zero. */
15146 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
15149 /* Subtract the price of lea. */
15153 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
15157 /* Return true if it is ok to optimize an ADD operation to LEA
15158 operation to avoid flag register consumation. For most processors,
15159 ADD is faster than LEA. For the processors like BONNELL, if the
15160 destination register of LEA holds an actual address which will be
15161 used soon, LEA is better and otherwise ADD is better. */
15164 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
15166 unsigned int regno0
= true_regnum (operands
[0]);
15167 unsigned int regno1
= true_regnum (operands
[1]);
15168 unsigned int regno2
= true_regnum (operands
[2]);
15170 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15171 if (regno0
!= regno1
&& regno0
!= regno2
)
15174 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15177 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
15180 /* Return true if destination reg of SET_BODY is shift count of
15184 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
15190 /* Retrieve destination of SET_BODY. */
15191 switch (GET_CODE (set_body
))
15194 set_dest
= SET_DEST (set_body
);
15195 if (!set_dest
|| !REG_P (set_dest
))
15199 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
15200 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
15208 /* Retrieve shift count of USE_BODY. */
15209 switch (GET_CODE (use_body
))
15212 shift_rtx
= XEXP (use_body
, 1);
15215 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
15216 if (ix86_dep_by_shift_count_body (set_body
,
15217 XVECEXP (use_body
, 0, i
)))
15225 && (GET_CODE (shift_rtx
) == ASHIFT
15226 || GET_CODE (shift_rtx
) == LSHIFTRT
15227 || GET_CODE (shift_rtx
) == ASHIFTRT
15228 || GET_CODE (shift_rtx
) == ROTATE
15229 || GET_CODE (shift_rtx
) == ROTATERT
))
15231 rtx shift_count
= XEXP (shift_rtx
, 1);
15233 /* Return true if shift count is dest of SET_BODY. */
15234 if (REG_P (shift_count
))
15236 /* Add check since it can be invoked before register
15237 allocation in pre-reload schedule. */
15238 if (reload_completed
15239 && true_regnum (set_dest
) == true_regnum (shift_count
))
15241 else if (REGNO(set_dest
) == REGNO(shift_count
))
15249 /* Return true if destination reg of SET_INSN is shift count of
15253 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
15255 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
15256 PATTERN (use_insn
));
15259 /* Return TRUE or FALSE depending on whether the unary operator meets the
15260 appropriate constraints. */
15263 ix86_unary_operator_ok (enum rtx_code
,
15267 /* If one of operands is memory, source and destination must match. */
15268 if ((MEM_P (operands
[0])
15269 || MEM_P (operands
[1]))
15270 && ! rtx_equal_p (operands
[0], operands
[1]))
15275 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15276 are ok, keeping in mind the possible movddup alternative. */
15279 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
15281 if (MEM_P (operands
[0]))
15282 return rtx_equal_p (operands
[0], operands
[1 + high
]);
15283 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
15284 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
15288 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15289 then replicate the value for all elements of the vector
15293 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
15297 machine_mode scalar_mode
;
15322 n_elt
= GET_MODE_NUNITS (mode
);
15323 v
= rtvec_alloc (n_elt
);
15324 scalar_mode
= GET_MODE_INNER (mode
);
15326 RTVEC_ELT (v
, 0) = value
;
15328 for (i
= 1; i
< n_elt
; ++i
)
15329 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
15331 return gen_rtx_CONST_VECTOR (mode
, v
);
15334 gcc_unreachable ();
15338 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15339 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15340 for an SSE register. If VECT is true, then replicate the mask for
15341 all elements of the vector register. If INVERT is true, then create
15342 a mask excluding the sign bit. */
15345 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
15347 machine_mode vec_mode
, imode
;
15376 vec_mode
= VOIDmode
;
15381 gcc_unreachable ();
15384 machine_mode inner_mode
= GET_MODE_INNER (mode
);
15385 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
15386 GET_MODE_BITSIZE (inner_mode
));
15388 w
= wi::bit_not (w
);
15390 /* Force this value into the low part of a fp vector constant. */
15391 mask
= immed_wide_int_const (w
, imode
);
15392 mask
= gen_lowpart (inner_mode
, mask
);
15394 if (vec_mode
== VOIDmode
)
15395 return force_reg (inner_mode
, mask
);
15397 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
15398 return force_reg (vec_mode
, v
);
15401 /* Return TRUE or FALSE depending on whether the first SET in INSN
15402 has source and destination with matching CC modes, and that the
15403 CC mode is at least as constrained as REQ_MODE. */
15406 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
15409 machine_mode set_mode
;
15411 set
= PATTERN (insn
);
15412 if (GET_CODE (set
) == PARALLEL
)
15413 set
= XVECEXP (set
, 0, 0);
15414 gcc_assert (GET_CODE (set
) == SET
);
15415 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
15417 set_mode
= GET_MODE (SET_DEST (set
));
15421 if (req_mode
!= CCNOmode
15422 && (req_mode
!= CCmode
15423 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
15427 if (req_mode
== CCGCmode
)
15431 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
15435 if (req_mode
== CCZmode
)
15448 if (set_mode
!= req_mode
)
15453 gcc_unreachable ();
15456 return GET_MODE (SET_SRC (set
)) == set_mode
;
15460 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
15462 machine_mode mode
= GET_MODE (op0
);
15464 if (SCALAR_FLOAT_MODE_P (mode
))
15466 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
15472 /* Only zero flag is needed. */
15473 case EQ
: /* ZF=0 */
15474 case NE
: /* ZF!=0 */
15476 /* Codes needing carry flag. */
15477 case GEU
: /* CF=0 */
15478 case LTU
: /* CF=1 */
15480 /* Detect overflow checks. They need just the carry flag. */
15481 if (GET_CODE (op0
) == PLUS
15482 && (rtx_equal_p (op1
, XEXP (op0
, 0))
15483 || rtx_equal_p (op1
, XEXP (op0
, 1))))
15485 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15487 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15489 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15490 where CC_CCC is either CC or CCC. */
15491 else if (code
== LTU
15492 && GET_CODE (op0
) == NEG
15493 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
15494 && REG_P (XEXP (geu
, 0))
15495 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
15496 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
15497 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
15498 && XEXP (geu
, 1) == const0_rtx
15499 && GET_CODE (op1
) == LTU
15500 && REG_P (XEXP (op1
, 0))
15501 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
15502 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
15503 && XEXP (op1
, 1) == const0_rtx
)
15507 case GTU
: /* CF=0 & ZF=0 */
15508 case LEU
: /* CF=1 | ZF=1 */
15510 /* Codes possibly doable only with sign flag when
15511 comparing against zero. */
15512 case GE
: /* SF=OF or SF=0 */
15513 case LT
: /* SF<>OF or SF=1 */
15514 if (op1
== const0_rtx
)
15517 /* For other cases Carry flag is not required. */
15519 /* Codes doable only with sign flag when comparing
15520 against zero, but we miss jump instruction for it
15521 so we need to use relational tests against overflow
15522 that thus needs to be zero. */
15523 case GT
: /* ZF=0 & SF=OF */
15524 case LE
: /* ZF=1 | SF<>OF */
15525 if (op1
== const0_rtx
)
15529 /* strcmp pattern do (use flags) and combine may ask us for proper
15534 gcc_unreachable ();
15538 /* Return the fixed registers used for condition codes. */
15541 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
15544 *p2
= INVALID_REGNUM
;
15548 /* If two condition code modes are compatible, return a condition code
15549 mode which is compatible with both. Otherwise, return
15552 static machine_mode
15553 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
15558 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
15561 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
15562 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
15565 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
15566 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
15570 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
15572 else if (m2
== CCZmode
15573 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
15579 gcc_unreachable ();
15610 /* These are only compatible with themselves, which we already
15616 /* Return strategy to use for floating-point. We assume that fcomi is always
15617 preferrable where available, since that is also true when looking at size
15618 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15620 enum ix86_fpcmp_strategy
15621 ix86_fp_comparison_strategy (enum rtx_code
)
15623 /* Do fcomi/sahf based test when profitable. */
15626 return IX86_FPCMP_COMI
;
15628 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
15629 return IX86_FPCMP_SAHF
;
15631 return IX86_FPCMP_ARITH
;
15634 /* Convert comparison codes we use to represent FP comparison to integer
15635 code that will result in proper branch. Return UNKNOWN if no such code
15639 ix86_fp_compare_code_to_integer (enum rtx_code code
)
15663 /* Zero extend possibly SImode EXP to Pmode register. */
15665 ix86_zero_extend_to_Pmode (rtx exp
)
15667 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
15670 /* Return true if the function being called was marked with attribute
15671 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15672 to handle the non-PIC case in the backend because there is no easy
15673 interface for the front-end to force non-PLT calls to use the GOT.
15674 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15675 to call the function marked "noplt" indirectly. */
15678 ix86_nopic_noplt_attribute_p (rtx call_op
)
15680 if (flag_pic
|| ix86_cmodel
== CM_LARGE
15681 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
15682 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
15683 || SYMBOL_REF_LOCAL_P (call_op
))
15686 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
15689 || (symbol_decl
!= NULL_TREE
15690 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
15696 /* Helper to output the jmp/call. */
15698 ix86_output_jmp_thunk_or_indirect (const char *thunk_name
, const int regno
)
15700 if (thunk_name
!= NULL
)
15702 fprintf (asm_out_file
, "\tjmp\t");
15703 assemble_name (asm_out_file
, thunk_name
);
15704 putc ('\n', asm_out_file
);
15707 output_indirect_thunk (regno
);
15710 /* Output indirect branch via a call and return thunk. CALL_OP is a
15711 register which contains the branch target. XASM is the assembly
15712 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15713 A normal call is converted to:
15715 call __x86_indirect_thunk_reg
15717 and a tail call is converted to:
15719 jmp __x86_indirect_thunk_reg
15723 ix86_output_indirect_branch_via_reg (rtx call_op
, bool sibcall_p
)
15725 char thunk_name_buf
[32];
15727 enum indirect_thunk_prefix need_prefix
15728 = indirect_thunk_need_prefix (current_output_insn
);
15729 int regno
= REGNO (call_op
);
15731 if (cfun
->machine
->indirect_branch_type
15732 != indirect_branch_thunk_inline
)
15734 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15737 if (i
>= FIRST_REX_INT_REG
)
15738 i
-= (FIRST_REX_INT_REG
- LAST_INT_REG
- 1);
15739 indirect_thunks_used
|= 1 << i
;
15741 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15742 thunk_name
= thunk_name_buf
;
15748 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15751 if (thunk_name
!= NULL
)
15753 fprintf (asm_out_file
, "\tcall\t");
15754 assemble_name (asm_out_file
, thunk_name
);
15755 putc ('\n', asm_out_file
);
15759 char indirectlabel1
[32];
15760 char indirectlabel2
[32];
15762 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15764 indirectlabelno
++);
15765 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15767 indirectlabelno
++);
15770 fputs ("\tjmp\t", asm_out_file
);
15771 assemble_name_raw (asm_out_file
, indirectlabel2
);
15772 fputc ('\n', asm_out_file
);
15774 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15776 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15778 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15781 fputs ("\tcall\t", asm_out_file
);
15782 assemble_name_raw (asm_out_file
, indirectlabel1
);
15783 fputc ('\n', asm_out_file
);
15787 /* Output indirect branch via a call and return thunk. CALL_OP is
15788 the branch target. XASM is the assembly template for CALL_OP.
15789 Branch is a tail call if SIBCALL_P is true. A normal call is
15795 jmp __x86_indirect_thunk
15799 and a tail call is converted to:
15802 jmp __x86_indirect_thunk
15806 ix86_output_indirect_branch_via_push (rtx call_op
, const char *xasm
,
15809 char thunk_name_buf
[32];
15812 enum indirect_thunk_prefix need_prefix
15813 = indirect_thunk_need_prefix (current_output_insn
);
15816 if (cfun
->machine
->indirect_branch_type
15817 != indirect_branch_thunk_inline
)
15819 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15820 indirect_thunk_needed
= true;
15821 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15822 thunk_name
= thunk_name_buf
;
15827 snprintf (push_buf
, sizeof (push_buf
), "push{%c}\t%s",
15828 TARGET_64BIT
? 'q' : 'l', xasm
);
15832 output_asm_insn (push_buf
, &call_op
);
15833 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15837 char indirectlabel1
[32];
15838 char indirectlabel2
[32];
15840 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15842 indirectlabelno
++);
15843 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15845 indirectlabelno
++);
15848 fputs ("\tjmp\t", asm_out_file
);
15849 assemble_name_raw (asm_out_file
, indirectlabel2
);
15850 fputc ('\n', asm_out_file
);
15852 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15854 /* An external function may be called via GOT, instead of PLT. */
15855 if (MEM_P (call_op
))
15857 struct ix86_address parts
;
15858 rtx addr
= XEXP (call_op
, 0);
15859 if (ix86_decompose_address (addr
, &parts
)
15860 && parts
.base
== stack_pointer_rtx
)
15862 /* Since call will adjust stack by -UNITS_PER_WORD,
15863 we must convert "disp(stack, index, scale)" to
15864 "disp+UNITS_PER_WORD(stack, index, scale)". */
15867 addr
= gen_rtx_MULT (Pmode
, parts
.index
,
15868 GEN_INT (parts
.scale
));
15869 addr
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15873 addr
= stack_pointer_rtx
;
15876 if (parts
.disp
!= NULL_RTX
)
15877 disp
= plus_constant (Pmode
, parts
.disp
,
15880 disp
= GEN_INT (UNITS_PER_WORD
);
15882 addr
= gen_rtx_PLUS (Pmode
, addr
, disp
);
15883 call_op
= gen_rtx_MEM (GET_MODE (call_op
), addr
);
15887 output_asm_insn (push_buf
, &call_op
);
15889 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15891 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15894 fputs ("\tcall\t", asm_out_file
);
15895 assemble_name_raw (asm_out_file
, indirectlabel1
);
15896 fputc ('\n', asm_out_file
);
15900 /* Output indirect branch via a call and return thunk. CALL_OP is
15901 the branch target. XASM is the assembly template for CALL_OP.
15902 Branch is a tail call if SIBCALL_P is true. */
15905 ix86_output_indirect_branch (rtx call_op
, const char *xasm
,
15908 if (REG_P (call_op
))
15909 ix86_output_indirect_branch_via_reg (call_op
, sibcall_p
);
15911 ix86_output_indirect_branch_via_push (call_op
, xasm
, sibcall_p
);
15914 /* Output indirect jump. CALL_OP is the jump target. */
15917 ix86_output_indirect_jmp (rtx call_op
)
15919 if (cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
)
15921 /* We can't have red-zone since "call" in the indirect thunk
15922 pushes the return address onto stack, destroying red-zone. */
15923 if (ix86_red_zone_size
!= 0)
15924 gcc_unreachable ();
15926 ix86_output_indirect_branch (call_op
, "%0", true);
15930 return "%!jmp\t%A0";
15933 /* Output return instrumentation for current function if needed. */
15936 output_return_instrumentation (void)
15938 if (ix86_instrument_return
!= instrument_return_none
15940 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun
->decl
))
15942 if (ix86_flag_record_return
)
15943 fprintf (asm_out_file
, "1:\n");
15944 switch (ix86_instrument_return
)
15946 case instrument_return_call
:
15947 fprintf (asm_out_file
, "\tcall\t__return__\n");
15949 case instrument_return_nop5
:
15950 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15951 fprintf (asm_out_file
, ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15953 case instrument_return_none
:
15957 if (ix86_flag_record_return
)
15959 fprintf (asm_out_file
, "\t.section __return_loc, \"a\",@progbits\n");
15960 fprintf (asm_out_file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
15961 fprintf (asm_out_file
, "\t.previous\n");
15966 /* Output function return. CALL_OP is the jump target. Add a REP
15967 prefix to RET if LONG_P is true and function return is kept. */
15970 ix86_output_function_return (bool long_p
)
15972 output_return_instrumentation ();
15974 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
15976 char thunk_name
[32];
15977 enum indirect_thunk_prefix need_prefix
15978 = indirect_thunk_need_prefix (current_output_insn
);
15980 if (cfun
->machine
->function_return_type
15981 != indirect_branch_thunk_inline
)
15983 bool need_thunk
= (cfun
->machine
->function_return_type
15984 == indirect_branch_thunk
);
15985 indirect_thunk_name (thunk_name
, INVALID_REGNUM
, need_prefix
,
15987 indirect_return_needed
|= need_thunk
;
15988 fprintf (asm_out_file
, "\tjmp\t");
15989 assemble_name (asm_out_file
, thunk_name
);
15990 putc ('\n', asm_out_file
);
15993 output_indirect_thunk (INVALID_REGNUM
);
16001 return "rep%; ret";
16004 /* Output indirect function return. RET_OP is the function return
16008 ix86_output_indirect_function_return (rtx ret_op
)
16010 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
16012 char thunk_name
[32];
16013 enum indirect_thunk_prefix need_prefix
16014 = indirect_thunk_need_prefix (current_output_insn
);
16015 unsigned int regno
= REGNO (ret_op
);
16016 gcc_assert (regno
== CX_REG
);
16018 if (cfun
->machine
->function_return_type
16019 != indirect_branch_thunk_inline
)
16021 bool need_thunk
= (cfun
->machine
->function_return_type
16022 == indirect_branch_thunk
);
16023 indirect_thunk_name (thunk_name
, regno
, need_prefix
, true);
16027 indirect_return_via_cx
= true;
16028 indirect_thunks_used
|= 1 << CX_REG
;
16030 fprintf (asm_out_file
, "\tjmp\t");
16031 assemble_name (asm_out_file
, thunk_name
);
16032 putc ('\n', asm_out_file
);
16035 output_indirect_thunk (regno
);
16040 return "%!jmp\t%A0";
16043 /* Output the assembly for a call instruction. */
16046 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
16048 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
16049 bool output_indirect_p
16051 && cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
);
16052 bool seh_nop_p
= false;
16055 if (SIBLING_CALL_P (insn
))
16057 output_return_instrumentation ();
16060 if (ix86_nopic_noplt_attribute_p (call_op
))
16065 if (output_indirect_p
)
16066 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16068 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16072 if (output_indirect_p
)
16073 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16075 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16079 xasm
= "%!jmp\t%P0";
16081 /* SEH epilogue detection requires the indirect branch case
16082 to include REX.W. */
16083 else if (TARGET_SEH
)
16084 xasm
= "%!rex.W jmp\t%A0";
16087 if (output_indirect_p
)
16090 xasm
= "%!jmp\t%A0";
16093 if (output_indirect_p
&& !direct_p
)
16094 ix86_output_indirect_branch (call_op
, xasm
, true);
16096 output_asm_insn (xasm
, &call_op
);
16100 /* SEH unwinding can require an extra nop to be emitted in several
16101 circumstances. Determine if we have one of those. */
16106 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
16108 /* Prevent a catch region from being adjacent to a jump that would
16109 be interpreted as an epilogue sequence by the unwinder. */
16110 if (JUMP_P(i
) && CROSSING_JUMP_P (i
))
16116 /* If we get to another real insn, we don't need the nop. */
16120 /* If we get to the epilogue note, prevent a catch region from
16121 being adjacent to the standard epilogue sequence. If non-
16122 call-exceptions, we'll have done this during epilogue emission. */
16123 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
16124 && !flag_non_call_exceptions
16125 && !can_throw_internal (insn
))
16132 /* If we didn't find a real insn following the call, prevent the
16133 unwinder from looking into the next function. */
16140 if (ix86_nopic_noplt_attribute_p (call_op
))
16145 if (output_indirect_p
)
16146 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16148 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16152 if (output_indirect_p
)
16153 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16155 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16159 xasm
= "%!call\t%P0";
16163 if (output_indirect_p
)
16166 xasm
= "%!call\t%A0";
16169 if (output_indirect_p
&& !direct_p
)
16170 ix86_output_indirect_branch (call_op
, xasm
, false);
16172 output_asm_insn (xasm
, &call_op
);
16180 /* Return a MEM corresponding to a stack slot with mode MODE.
16181 Allocate a new slot if necessary.
16183 The RTL for a function can have several slots available: N is
16184 which slot to use. */
16187 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
16189 struct stack_local_entry
*s
;
16191 gcc_assert (n
< MAX_386_STACK_LOCALS
);
16193 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16194 if (s
->mode
== mode
&& s
->n
== n
)
16195 return validize_mem (copy_rtx (s
->rtl
));
16197 s
= ggc_alloc
<stack_local_entry
> ();
16200 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
16202 s
->next
= ix86_stack_locals
;
16203 ix86_stack_locals
= s
;
16204 return validize_mem (copy_rtx (s
->rtl
));
16208 ix86_instantiate_decls (void)
16210 struct stack_local_entry
*s
;
16212 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16213 if (s
->rtl
!= NULL_RTX
)
16214 instantiate_decl_rtl (s
->rtl
);
16217 /* Check whether x86 address PARTS is a pc-relative address. */
16220 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
16222 rtx base
, index
, disp
;
16224 base
= parts
->base
;
16225 index
= parts
->index
;
16226 disp
= parts
->disp
;
16228 if (disp
&& !base
&& !index
)
16234 if (GET_CODE (disp
) == CONST
)
16235 symbol
= XEXP (disp
, 0);
16236 if (GET_CODE (symbol
) == PLUS
16237 && CONST_INT_P (XEXP (symbol
, 1)))
16238 symbol
= XEXP (symbol
, 0);
16240 if (GET_CODE (symbol
) == LABEL_REF
16241 || (GET_CODE (symbol
) == SYMBOL_REF
16242 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
16243 || (GET_CODE (symbol
) == UNSPEC
16244 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
16245 || XINT (symbol
, 1) == UNSPEC_PCREL
16246 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
16253 /* Calculate the length of the memory address in the instruction encoding.
16254 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16255 or other prefixes. We never generate addr32 prefix for LEA insn. */
16258 memory_address_length (rtx addr
, bool lea
)
16260 struct ix86_address parts
;
16261 rtx base
, index
, disp
;
16265 if (GET_CODE (addr
) == PRE_DEC
16266 || GET_CODE (addr
) == POST_INC
16267 || GET_CODE (addr
) == PRE_MODIFY
16268 || GET_CODE (addr
) == POST_MODIFY
)
16271 ok
= ix86_decompose_address (addr
, &parts
);
16274 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
16276 /* If this is not LEA instruction, add the length of addr32 prefix. */
16277 if (TARGET_64BIT
&& !lea
16278 && (SImode_address_operand (addr
, VOIDmode
)
16279 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
16280 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
16284 index
= parts
.index
;
16287 if (base
&& SUBREG_P (base
))
16288 base
= SUBREG_REG (base
);
16289 if (index
&& SUBREG_P (index
))
16290 index
= SUBREG_REG (index
);
16292 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
16293 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
16296 - esp as the base always wants an index,
16297 - ebp as the base always wants a displacement,
16298 - r12 as the base always wants an index,
16299 - r13 as the base always wants a displacement. */
16301 /* Register Indirect. */
16302 if (base
&& !index
&& !disp
)
16304 /* esp (for its index) and ebp (for its displacement) need
16305 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16307 if (base
== arg_pointer_rtx
16308 || base
== frame_pointer_rtx
16309 || REGNO (base
) == SP_REG
16310 || REGNO (base
) == BP_REG
16311 || REGNO (base
) == R12_REG
16312 || REGNO (base
) == R13_REG
)
16316 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16317 is not disp32, but disp32(%rip), so for disp32
16318 SIB byte is needed, unless print_operand_address
16319 optimizes it into disp32(%rip) or (%rip) is implied
16321 else if (disp
&& !base
&& !index
)
16324 if (!ix86_rip_relative_addr_p (&parts
))
16329 /* Find the length of the displacement constant. */
16332 if (base
&& satisfies_constraint_K (disp
))
16337 /* ebp always wants a displacement. Similarly r13. */
16338 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
16341 /* An index requires the two-byte modrm form.... */
16343 /* ...like esp (or r12), which always wants an index. */
16344 || base
== arg_pointer_rtx
16345 || base
== frame_pointer_rtx
16346 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
16353 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16354 is set, expect that insn have 8bit immediate alternative. */
16356 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
16360 extract_insn_cached (insn
);
16361 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16362 if (CONSTANT_P (recog_data
.operand
[i
]))
16364 enum attr_mode mode
= get_attr_mode (insn
);
16367 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
16369 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
16376 ival
= trunc_int_for_mode (ival
, HImode
);
16379 ival
= trunc_int_for_mode (ival
, SImode
);
16384 if (IN_RANGE (ival
, -128, 127))
16401 /* Immediates for DImode instructions are encoded
16402 as 32bit sign extended values. */
16407 fatal_insn ("unknown insn mode", insn
);
16413 /* Compute default value for "length_address" attribute. */
16415 ix86_attr_length_address_default (rtx_insn
*insn
)
16419 if (get_attr_type (insn
) == TYPE_LEA
)
16421 rtx set
= PATTERN (insn
), addr
;
16423 if (GET_CODE (set
) == PARALLEL
)
16424 set
= XVECEXP (set
, 0, 0);
16426 gcc_assert (GET_CODE (set
) == SET
);
16428 addr
= SET_SRC (set
);
16430 return memory_address_length (addr
, true);
16433 extract_insn_cached (insn
);
16434 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16436 rtx op
= recog_data
.operand
[i
];
16439 constrain_operands_cached (insn
, reload_completed
);
16440 if (which_alternative
!= -1)
16442 const char *constraints
= recog_data
.constraints
[i
];
16443 int alt
= which_alternative
;
16445 while (*constraints
== '=' || *constraints
== '+')
16448 while (*constraints
++ != ',')
16450 /* Skip ignored operands. */
16451 if (*constraints
== 'X')
16455 int len
= memory_address_length (XEXP (op
, 0), false);
16457 /* Account for segment prefix for non-default addr spaces. */
16458 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
16467 /* Compute default value for "length_vex" attribute. It includes
16468 2 or 3 byte VEX prefix and 1 opcode byte. */
16471 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
16476 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16477 byte VEX prefix. */
16478 if (!has_0f_opcode
|| has_vex_w
)
16481 /* We can always use 2 byte VEX prefix in 32bit. */
16485 extract_insn_cached (insn
);
16487 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16488 if (REG_P (recog_data
.operand
[i
]))
16490 /* REX.W bit uses 3 byte VEX prefix. */
16491 if (GET_MODE (recog_data
.operand
[i
]) == DImode
16492 && GENERAL_REG_P (recog_data
.operand
[i
]))
16497 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16498 if (MEM_P (recog_data
.operand
[i
])
16499 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
16508 ix86_class_likely_spilled_p (reg_class_t
);
16510 /* Returns true if lhs of insn is HW function argument register and set up
16511 is_spilled to true if it is likely spilled HW register. */
16513 insn_is_function_arg (rtx insn
, bool* is_spilled
)
16517 if (!NONDEBUG_INSN_P (insn
))
16519 /* Call instructions are not movable, ignore it. */
16522 insn
= PATTERN (insn
);
16523 if (GET_CODE (insn
) == PARALLEL
)
16524 insn
= XVECEXP (insn
, 0, 0);
16525 if (GET_CODE (insn
) != SET
)
16527 dst
= SET_DEST (insn
);
16528 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
16529 && ix86_function_arg_regno_p (REGNO (dst
)))
16531 /* Is it likely spilled HW register? */
16532 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
16533 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
16534 *is_spilled
= true;
16540 /* Add output dependencies for chain of function adjacent arguments if only
16541 there is a move to likely spilled HW register. Return first argument
16542 if at least one dependence was added or NULL otherwise. */
16544 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
16547 rtx_insn
*last
= call
;
16548 rtx_insn
*first_arg
= NULL
;
16549 bool is_spilled
= false;
16551 head
= PREV_INSN (head
);
16553 /* Find nearest to call argument passing instruction. */
16556 last
= PREV_INSN (last
);
16559 if (!NONDEBUG_INSN_P (last
))
16561 if (insn_is_function_arg (last
, &is_spilled
))
16569 insn
= PREV_INSN (last
);
16570 if (!INSN_P (insn
))
16574 if (!NONDEBUG_INSN_P (insn
))
16579 if (insn_is_function_arg (insn
, &is_spilled
))
16581 /* Add output depdendence between two function arguments if chain
16582 of output arguments contains likely spilled HW registers. */
16584 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16585 first_arg
= last
= insn
;
16595 /* Add output or anti dependency from insn to first_arg to restrict its code
16598 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
16603 set
= single_set (insn
);
16606 tmp
= SET_DEST (set
);
16609 /* Add output dependency to the first function argument. */
16610 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16613 /* Add anti dependency. */
16614 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
16617 /* Avoid cross block motion of function argument through adding dependency
16618 from the first non-jump instruction in bb. */
16620 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
16622 rtx_insn
*insn
= BB_END (bb
);
16626 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
16628 rtx set
= single_set (insn
);
16631 avoid_func_arg_motion (arg
, insn
);
16635 if (insn
== BB_HEAD (bb
))
16637 insn
= PREV_INSN (insn
);
16641 /* Hook for pre-reload schedule - avoid motion of function arguments
16642 passed in likely spilled HW registers. */
16644 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
16647 rtx_insn
*first_arg
= NULL
;
16648 if (reload_completed
)
16650 while (head
!= tail
&& DEBUG_INSN_P (head
))
16651 head
= NEXT_INSN (head
);
16652 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16653 if (INSN_P (insn
) && CALL_P (insn
))
16655 first_arg
= add_parameter_dependencies (insn
, head
);
16658 /* Add dependee for first argument to predecessors if only
16659 region contains more than one block. */
16660 basic_block bb
= BLOCK_FOR_INSN (insn
);
16661 int rgn
= CONTAINING_RGN (bb
->index
);
16662 int nr_blks
= RGN_NR_BLOCKS (rgn
);
16663 /* Skip trivial regions and region head blocks that can have
16664 predecessors outside of region. */
16665 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
16670 /* Regions are SCCs with the exception of selective
16671 scheduling with pipelining of outer blocks enabled.
16672 So also check that immediate predecessors of a non-head
16673 block are in the same region. */
16674 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16676 /* Avoid creating of loop-carried dependencies through
16677 using topological ordering in the region. */
16678 if (rgn
== CONTAINING_RGN (e
->src
->index
)
16679 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
16680 add_dependee_for_func_arg (first_arg
, e
->src
);
16688 else if (first_arg
)
16689 avoid_func_arg_motion (first_arg
, insn
);
16692 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16693 HW registers to maximum, to schedule them at soon as possible. These are
16694 moves from function argument registers at the top of the function entry
16695 and moves from function return value registers after call. */
16697 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
16701 if (reload_completed
)
16704 if (!NONDEBUG_INSN_P (insn
))
16707 set
= single_set (insn
);
16710 rtx tmp
= SET_SRC (set
);
16712 && HARD_REGISTER_P (tmp
)
16713 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
16714 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
16715 return current_sched_info
->sched_max_insns_priority
;
16721 /* Prepare for scheduling pass. */
16723 ix86_sched_init_global (FILE *, int, int)
16725 /* Install scheduling hooks for current CPU. Some of these hooks are used
16726 in time-critical parts of the scheduler, so we only set them up when
16727 they are actually used. */
16730 case PROCESSOR_CORE2
:
16731 case PROCESSOR_NEHALEM
:
16732 case PROCESSOR_SANDYBRIDGE
:
16733 case PROCESSOR_HASWELL
:
16734 case PROCESSOR_GENERIC
:
16735 /* Do not perform multipass scheduling for pre-reload schedule
16736 to save compile time. */
16737 if (reload_completed
)
16739 ix86_core2i7_init_hooks ();
16742 /* Fall through. */
16744 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
16745 targetm
.sched
.first_cycle_multipass_init
= NULL
;
16746 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
16747 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
16748 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
16749 targetm
.sched
.first_cycle_multipass_end
= NULL
;
16750 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
16756 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16758 static HOST_WIDE_INT
16759 ix86_static_rtx_alignment (machine_mode mode
)
16761 if (mode
== DFmode
)
16763 if (ALIGN_MODE_128 (mode
))
16764 return MAX (128, GET_MODE_ALIGNMENT (mode
));
16765 return GET_MODE_ALIGNMENT (mode
);
16768 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16770 static HOST_WIDE_INT
16771 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
16773 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
16774 || TREE_CODE (exp
) == INTEGER_CST
)
16776 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
16777 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
16778 return MAX (mode_align
, align
);
16780 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16781 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16782 return BITS_PER_WORD
;
16787 /* Implement TARGET_EMPTY_RECORD_P. */
16790 ix86_is_empty_record (const_tree type
)
16794 return default_is_empty_record (type
);
16797 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16800 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
16802 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
16804 if (!cum
->warn_empty
)
16807 if (!TYPE_EMPTY_P (type
))
16810 /* Don't warn if the function isn't visible outside of the TU. */
16811 if (cum
->decl
&& !TREE_PUBLIC (cum
->decl
))
16814 const_tree ctx
= get_ultimate_context (cum
->decl
);
16815 if (ctx
!= NULL_TREE
16816 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
16819 /* If the actual size of the type is zero, then there is no change
16820 in how objects of this size are passed. */
16821 if (int_size_in_bytes (type
) == 0)
16824 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
16825 "changes in %<-fabi-version=12%> (GCC 8)", type
);
16827 /* Only warn once. */
16828 cum
->warn_empty
= false;
16831 /* This hook returns name of multilib ABI. */
16833 static const char *
16834 ix86_get_multilib_abi_name (void)
16836 if (!(TARGET_64BIT_P (ix86_isa_flags
)))
16838 else if (TARGET_X32_P (ix86_isa_flags
))
16844 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16845 the data type, and ALIGN is the alignment that the object would
16846 ordinarily have. */
16849 iamcu_alignment (tree type
, int align
)
16853 if (align
< 32 || TYPE_USER_ALIGN (type
))
16856 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16858 type
= strip_array_types (type
);
16859 if (TYPE_ATOMIC (type
))
16862 mode
= TYPE_MODE (type
);
16863 switch (GET_MODE_CLASS (mode
))
16866 case MODE_COMPLEX_INT
:
16867 case MODE_COMPLEX_FLOAT
:
16869 case MODE_DECIMAL_FLOAT
:
16876 /* Compute the alignment for a static variable.
16877 TYPE is the data type, and ALIGN is the alignment that
16878 the object would ordinarily have. The value of this function is used
16879 instead of that alignment to align the object. */
16882 ix86_data_alignment (tree type
, unsigned int align
, bool opt
)
16884 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16885 for symbols from other compilation units or symbols that don't need
16886 to bind locally. In order to preserve some ABI compatibility with
16887 those compilers, ensure we don't decrease alignment from what we
16890 unsigned int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
16892 /* A data structure, equal or greater than the size of a cache line
16893 (64 bytes in the Pentium 4 and other recent Intel processors, including
16894 processors based on Intel Core microarchitecture) should be aligned
16895 so that its base address is a multiple of a cache line size. */
16897 unsigned int max_align
16898 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
16900 if (max_align
< BITS_PER_WORD
)
16901 max_align
= BITS_PER_WORD
;
16903 switch (ix86_align_data_type
)
16905 case ix86_align_data_type_abi
: opt
= false; break;
16906 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
16907 case ix86_align_data_type_cacheline
: break;
16911 align
= iamcu_alignment (type
, align
);
16914 && AGGREGATE_TYPE_P (type
)
16915 && TYPE_SIZE (type
)
16916 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
16918 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
16919 && align
< max_align_compat
)
16920 align
= max_align_compat
;
16921 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
16922 && align
< max_align
)
16926 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16927 to 16byte boundary. */
16930 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
16931 && TYPE_SIZE (type
)
16932 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16933 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
16941 if (TREE_CODE (type
) == ARRAY_TYPE
)
16943 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16945 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16948 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16951 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16953 if ((TYPE_MODE (type
) == XCmode
16954 || TYPE_MODE (type
) == TCmode
) && align
< 128)
16957 else if ((TREE_CODE (type
) == RECORD_TYPE
16958 || TREE_CODE (type
) == UNION_TYPE
16959 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16960 && TYPE_FIELDS (type
))
16962 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16964 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16967 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16968 || TREE_CODE (type
) == INTEGER_TYPE
)
16970 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16972 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16979 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
16981 ix86_lower_local_decl_alignment (tree decl
)
16983 unsigned int new_align
= ix86_local_alignment (decl
, VOIDmode
,
16984 DECL_ALIGN (decl
), true);
16985 if (new_align
< DECL_ALIGN (decl
))
16986 SET_DECL_ALIGN (decl
, new_align
);
16989 /* Compute the alignment for a local variable or a stack slot. EXP is
16990 the data type or decl itself, MODE is the widest mode available and
16991 ALIGN is the alignment that the object would ordinarily have. The
16992 value of this macro is used instead of that alignment to align the
16996 ix86_local_alignment (tree exp
, machine_mode mode
,
16997 unsigned int align
, bool may_lower
)
17001 if (exp
&& DECL_P (exp
))
17003 type
= TREE_TYPE (exp
);
17012 /* Don't do dynamic stack realignment for long long objects with
17013 -mpreferred-stack-boundary=2. */
17017 && ix86_preferred_stack_boundary
< 64
17018 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
17019 && (!type
|| (!TYPE_USER_ALIGN (type
)
17020 && !TYPE_ATOMIC (strip_array_types (type
))))
17021 && (!decl
|| !DECL_USER_ALIGN (decl
)))
17024 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17025 register in MODE. We will return the largest alignment of XF
17029 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
17030 align
= GET_MODE_ALIGNMENT (DFmode
);
17034 /* Don't increase alignment for Intel MCU psABI. */
17038 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17039 to 16byte boundary. Exact wording is:
17041 An array uses the same alignment as its elements, except that a local or
17042 global array variable of length at least 16 bytes or
17043 a C99 variable-length array variable always has alignment of at least 16 bytes.
17045 This was added to allow use of aligned SSE instructions at arrays. This
17046 rule is meant for static storage (where compiler cannot do the analysis
17047 by itself). We follow it for automatic variables only when convenient.
17048 We fully control everything in the function compiled and functions from
17049 other unit cannot rely on the alignment.
17051 Exclude va_list type. It is the common case of local array where
17052 we cannot benefit from the alignment.
17054 TODO: Probably one should optimize for size only when var is not escaping. */
17055 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
17058 if (AGGREGATE_TYPE_P (type
)
17059 && (va_list_type_node
== NULL_TREE
17060 || (TYPE_MAIN_VARIANT (type
)
17061 != TYPE_MAIN_VARIANT (va_list_type_node
)))
17062 && TYPE_SIZE (type
)
17063 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
17064 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
17068 if (TREE_CODE (type
) == ARRAY_TYPE
)
17070 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
17072 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
17075 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
17077 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
17079 if ((TYPE_MODE (type
) == XCmode
17080 || TYPE_MODE (type
) == TCmode
) && align
< 128)
17083 else if ((TREE_CODE (type
) == RECORD_TYPE
17084 || TREE_CODE (type
) == UNION_TYPE
17085 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
17086 && TYPE_FIELDS (type
))
17088 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
17090 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
17093 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
17094 || TREE_CODE (type
) == INTEGER_TYPE
)
17097 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
17099 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
17105 /* Compute the minimum required alignment for dynamic stack realignment
17106 purposes for a local variable, parameter or a stack slot. EXP is
17107 the data type or decl itself, MODE is its mode and ALIGN is the
17108 alignment that the object would ordinarily have. */
17111 ix86_minimum_alignment (tree exp
, machine_mode mode
,
17112 unsigned int align
)
17116 if (exp
&& DECL_P (exp
))
17118 type
= TREE_TYPE (exp
);
17127 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
17130 /* Don't do dynamic stack realignment for long long objects with
17131 -mpreferred-stack-boundary=2. */
17132 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
17133 && (!type
|| (!TYPE_USER_ALIGN (type
)
17134 && !TYPE_ATOMIC (strip_array_types (type
))))
17135 && (!decl
|| !DECL_USER_ALIGN (decl
)))
17137 gcc_checking_assert (!TARGET_STV
);
17144 /* Find a location for the static chain incoming to a nested function.
17145 This is a register, unless all free registers are used by arguments. */
17148 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
17154 /* We always use R10 in 64-bit mode. */
17159 const_tree fntype
, fndecl
;
17162 /* By default in 32-bit mode we use ECX to pass the static chain. */
17165 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
17167 fntype
= TREE_TYPE (fndecl_or_type
);
17168 fndecl
= fndecl_or_type
;
17172 fntype
= fndecl_or_type
;
17176 ccvt
= ix86_get_callcvt (fntype
);
17177 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
17179 /* Fastcall functions use ecx/edx for arguments, which leaves
17180 us with EAX for the static chain.
17181 Thiscall functions use ecx for arguments, which also
17182 leaves us with EAX for the static chain. */
17185 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
17187 /* Thiscall functions use ecx for arguments, which leaves
17188 us with EAX and EDX for the static chain.
17189 We are using for abi-compatibility EAX. */
17192 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
17194 /* For regparm 3, we have no free call-clobbered registers in
17195 which to store the static chain. In order to implement this,
17196 we have the trampoline push the static chain to the stack.
17197 However, we can't push a value below the return address when
17198 we call the nested function directly, so we have to use an
17199 alternate entry point. For this we use ESI, and have the
17200 alternate entry point push ESI, so that things appear the
17201 same once we're executing the nested function. */
17204 if (fndecl
== current_function_decl
17205 && !ix86_static_chain_on_stack
)
17207 gcc_assert (!reload_completed
);
17208 ix86_static_chain_on_stack
= true;
17210 return gen_frame_mem (SImode
,
17211 plus_constant (Pmode
,
17212 arg_pointer_rtx
, -8));
17218 return gen_rtx_REG (Pmode
, regno
);
17221 /* Emit RTL insns to initialize the variable parts of a trampoline.
17222 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17223 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17224 to be passed to the target function. */
17227 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
17232 bool need_endbr
= (flag_cf_protection
& CF_BRANCH
);
17234 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
17242 /* Insert ENDBR64. */
17243 mem
= adjust_address (m_tramp
, SImode
, offset
);
17244 emit_move_insn (mem
, gen_int_mode (0xfa1e0ff3, SImode
));
17248 /* Load the function address to r11. Try to load address using
17249 the shorter movl instead of movabs. We may want to support
17250 movq for kernel mode, but kernel does not use trampolines at
17251 the moment. FNADDR is a 32bit address and may not be in
17252 DImode when ptr_mode == SImode. Always use movl in this
17254 if (ptr_mode
== SImode
17255 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
17257 fnaddr
= copy_addr_to_reg (fnaddr
);
17259 mem
= adjust_address (m_tramp
, HImode
, offset
);
17260 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
17262 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
17263 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
17268 mem
= adjust_address (m_tramp
, HImode
, offset
);
17269 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
17271 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
17272 emit_move_insn (mem
, fnaddr
);
17276 /* Load static chain using movabs to r10. Use the shorter movl
17277 instead of movabs when ptr_mode == SImode. */
17278 if (ptr_mode
== SImode
)
17289 mem
= adjust_address (m_tramp
, HImode
, offset
);
17290 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
17292 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
17293 emit_move_insn (mem
, chain_value
);
17296 /* Jump to r11; the last (unused) byte is a nop, only there to
17297 pad the write out to a single 32-bit store. */
17298 mem
= adjust_address (m_tramp
, SImode
, offset
);
17299 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
17306 /* Depending on the static chain location, either load a register
17307 with a constant, or push the constant to the stack. All of the
17308 instructions are the same size. */
17309 chain
= ix86_static_chain (fndecl
, true);
17312 switch (REGNO (chain
))
17315 opcode
= 0xb8; break;
17317 opcode
= 0xb9; break;
17319 gcc_unreachable ();
17327 /* Insert ENDBR32. */
17328 mem
= adjust_address (m_tramp
, SImode
, offset
);
17329 emit_move_insn (mem
, gen_int_mode (0xfb1e0ff3, SImode
));
17333 mem
= adjust_address (m_tramp
, QImode
, offset
);
17334 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
17336 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
17337 emit_move_insn (mem
, chain_value
);
17340 mem
= adjust_address (m_tramp
, QImode
, offset
);
17341 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
17343 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
17345 /* Compute offset from the end of the jmp to the target function.
17346 In the case in which the trampoline stores the static chain on
17347 the stack, we need to skip the first insn which pushes the
17348 (call-saved) register static chain; this push is 1 byte. */
17350 int skip
= MEM_P (chain
) ? 1 : 0;
17351 /* Skip ENDBR32 at the entry of the target function. */
17353 && !cgraph_node::get (fndecl
)->only_called_directly_p ())
17355 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
17356 plus_constant (Pmode
, XEXP (m_tramp
, 0),
17358 NULL_RTX
, 1, OPTAB_DIRECT
);
17359 emit_move_insn (mem
, disp
);
17362 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
17364 #ifdef HAVE_ENABLE_EXECUTE_STACK
17365 #ifdef CHECK_EXECUTE_STACK_ENABLED
17366 if (CHECK_EXECUTE_STACK_ENABLED
)
17368 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
17369 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
17374 ix86_allocate_stack_slots_for_args (void)
17376 /* Naked functions should not allocate stack slots for arguments. */
17377 return !ix86_function_naked (current_function_decl
);
17381 ix86_warn_func_return (tree decl
)
17383 /* Naked functions are implemented entirely in assembly, including the
17384 return sequence, so suppress warnings about this. */
17385 return !ix86_function_naked (decl
);
17388 /* Return the shift count of a vector by scalar shift builtin second argument
17391 ix86_vector_shift_count (tree arg1
)
17393 if (tree_fits_uhwi_p (arg1
))
17395 else if (TREE_CODE (arg1
) == VECTOR_CST
&& CHAR_BIT
== 8)
17397 /* The count argument is weird, passed in as various 128-bit
17398 (or 64-bit) vectors, the low 64 bits from it are the count. */
17399 unsigned char buf
[16];
17400 int len
= native_encode_expr (arg1
, buf
, 16);
17403 tree t
= native_interpret_expr (uint64_type_node
, buf
, len
);
17404 if (t
&& tree_fits_uhwi_p (t
))
17411 ix86_fold_builtin (tree fndecl
, int n_args
,
17412 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
17414 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
17416 enum ix86_builtins fn_code
17417 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
17418 enum rtx_code rcode
;
17420 unsigned HOST_WIDE_INT mask
;
17424 case IX86_BUILTIN_CPU_IS
:
17425 case IX86_BUILTIN_CPU_SUPPORTS
:
17426 gcc_assert (n_args
== 1);
17427 return fold_builtin_cpu (fndecl
, args
);
17429 case IX86_BUILTIN_NANQ
:
17430 case IX86_BUILTIN_NANSQ
:
17432 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17433 const char *str
= c_getstr (*args
);
17434 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
17435 REAL_VALUE_TYPE real
;
17437 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
17438 return build_real (type
, real
);
17442 case IX86_BUILTIN_INFQ
:
17443 case IX86_BUILTIN_HUGE_VALQ
:
17445 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17446 REAL_VALUE_TYPE inf
;
17448 return build_real (type
, inf
);
17451 case IX86_BUILTIN_TZCNT16
:
17452 case IX86_BUILTIN_CTZS
:
17453 case IX86_BUILTIN_TZCNT32
:
17454 case IX86_BUILTIN_TZCNT64
:
17455 gcc_assert (n_args
== 1);
17456 if (TREE_CODE (args
[0]) == INTEGER_CST
)
17458 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17459 tree arg
= args
[0];
17460 if (fn_code
== IX86_BUILTIN_TZCNT16
17461 || fn_code
== IX86_BUILTIN_CTZS
)
17462 arg
= fold_convert (short_unsigned_type_node
, arg
);
17463 if (integer_zerop (arg
))
17464 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
17466 return fold_const_call (CFN_CTZ
, type
, arg
);
17470 case IX86_BUILTIN_LZCNT16
:
17471 case IX86_BUILTIN_CLZS
:
17472 case IX86_BUILTIN_LZCNT32
:
17473 case IX86_BUILTIN_LZCNT64
:
17474 gcc_assert (n_args
== 1);
17475 if (TREE_CODE (args
[0]) == INTEGER_CST
)
17477 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17478 tree arg
= args
[0];
17479 if (fn_code
== IX86_BUILTIN_LZCNT16
17480 || fn_code
== IX86_BUILTIN_CLZS
)
17481 arg
= fold_convert (short_unsigned_type_node
, arg
);
17482 if (integer_zerop (arg
))
17483 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
17485 return fold_const_call (CFN_CLZ
, type
, arg
);
17489 case IX86_BUILTIN_BEXTR32
:
17490 case IX86_BUILTIN_BEXTR64
:
17491 case IX86_BUILTIN_BEXTRI32
:
17492 case IX86_BUILTIN_BEXTRI64
:
17493 gcc_assert (n_args
== 2);
17494 if (tree_fits_uhwi_p (args
[1]))
17496 unsigned HOST_WIDE_INT res
= 0;
17497 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
17498 unsigned int start
= tree_to_uhwi (args
[1]);
17499 unsigned int len
= (start
& 0xff00) >> 8;
17501 if (start
>= prec
|| len
== 0)
17503 else if (!tree_fits_uhwi_p (args
[0]))
17506 res
= tree_to_uhwi (args
[0]) >> start
;
17509 if (len
< HOST_BITS_PER_WIDE_INT
)
17510 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
17511 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17515 case IX86_BUILTIN_BZHI32
:
17516 case IX86_BUILTIN_BZHI64
:
17517 gcc_assert (n_args
== 2);
17518 if (tree_fits_uhwi_p (args
[1]))
17520 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
17521 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
17524 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), 0);
17525 if (!tree_fits_uhwi_p (args
[0]))
17527 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
17528 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
17529 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17533 case IX86_BUILTIN_PDEP32
:
17534 case IX86_BUILTIN_PDEP64
:
17535 gcc_assert (n_args
== 2);
17536 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
17538 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
17539 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
17540 unsigned HOST_WIDE_INT res
= 0;
17541 unsigned HOST_WIDE_INT m
, k
= 1;
17542 for (m
= 1; m
; m
<<= 1)
17543 if ((mask
& m
) != 0)
17545 if ((src
& k
) != 0)
17549 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17553 case IX86_BUILTIN_PEXT32
:
17554 case IX86_BUILTIN_PEXT64
:
17555 gcc_assert (n_args
== 2);
17556 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
17558 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
17559 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
17560 unsigned HOST_WIDE_INT res
= 0;
17561 unsigned HOST_WIDE_INT m
, k
= 1;
17562 for (m
= 1; m
; m
<<= 1)
17563 if ((mask
& m
) != 0)
17565 if ((src
& m
) != 0)
17569 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17573 case IX86_BUILTIN_MOVMSKPS
:
17574 case IX86_BUILTIN_PMOVMSKB
:
17575 case IX86_BUILTIN_MOVMSKPD
:
17576 case IX86_BUILTIN_PMOVMSKB128
:
17577 case IX86_BUILTIN_MOVMSKPD256
:
17578 case IX86_BUILTIN_MOVMSKPS256
:
17579 case IX86_BUILTIN_PMOVMSKB256
:
17580 gcc_assert (n_args
== 1);
17581 if (TREE_CODE (args
[0]) == VECTOR_CST
)
17583 HOST_WIDE_INT res
= 0;
17584 for (unsigned i
= 0; i
< VECTOR_CST_NELTS (args
[0]); ++i
)
17586 tree e
= VECTOR_CST_ELT (args
[0], i
);
17587 if (TREE_CODE (e
) == INTEGER_CST
&& !TREE_OVERFLOW (e
))
17589 if (wi::neg_p (wi::to_wide (e
)))
17590 res
|= HOST_WIDE_INT_1
<< i
;
17592 else if (TREE_CODE (e
) == REAL_CST
&& !TREE_OVERFLOW (e
))
17594 if (TREE_REAL_CST (e
).sign
)
17595 res
|= HOST_WIDE_INT_1
<< i
;
17600 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17604 case IX86_BUILTIN_PSLLD
:
17605 case IX86_BUILTIN_PSLLD128
:
17606 case IX86_BUILTIN_PSLLD128_MASK
:
17607 case IX86_BUILTIN_PSLLD256
:
17608 case IX86_BUILTIN_PSLLD256_MASK
:
17609 case IX86_BUILTIN_PSLLD512
:
17610 case IX86_BUILTIN_PSLLDI
:
17611 case IX86_BUILTIN_PSLLDI128
:
17612 case IX86_BUILTIN_PSLLDI128_MASK
:
17613 case IX86_BUILTIN_PSLLDI256
:
17614 case IX86_BUILTIN_PSLLDI256_MASK
:
17615 case IX86_BUILTIN_PSLLDI512
:
17616 case IX86_BUILTIN_PSLLQ
:
17617 case IX86_BUILTIN_PSLLQ128
:
17618 case IX86_BUILTIN_PSLLQ128_MASK
:
17619 case IX86_BUILTIN_PSLLQ256
:
17620 case IX86_BUILTIN_PSLLQ256_MASK
:
17621 case IX86_BUILTIN_PSLLQ512
:
17622 case IX86_BUILTIN_PSLLQI
:
17623 case IX86_BUILTIN_PSLLQI128
:
17624 case IX86_BUILTIN_PSLLQI128_MASK
:
17625 case IX86_BUILTIN_PSLLQI256
:
17626 case IX86_BUILTIN_PSLLQI256_MASK
:
17627 case IX86_BUILTIN_PSLLQI512
:
17628 case IX86_BUILTIN_PSLLW
:
17629 case IX86_BUILTIN_PSLLW128
:
17630 case IX86_BUILTIN_PSLLW128_MASK
:
17631 case IX86_BUILTIN_PSLLW256
:
17632 case IX86_BUILTIN_PSLLW256_MASK
:
17633 case IX86_BUILTIN_PSLLW512_MASK
:
17634 case IX86_BUILTIN_PSLLWI
:
17635 case IX86_BUILTIN_PSLLWI128
:
17636 case IX86_BUILTIN_PSLLWI128_MASK
:
17637 case IX86_BUILTIN_PSLLWI256
:
17638 case IX86_BUILTIN_PSLLWI256_MASK
:
17639 case IX86_BUILTIN_PSLLWI512_MASK
:
17643 case IX86_BUILTIN_PSRAD
:
17644 case IX86_BUILTIN_PSRAD128
:
17645 case IX86_BUILTIN_PSRAD128_MASK
:
17646 case IX86_BUILTIN_PSRAD256
:
17647 case IX86_BUILTIN_PSRAD256_MASK
:
17648 case IX86_BUILTIN_PSRAD512
:
17649 case IX86_BUILTIN_PSRADI
:
17650 case IX86_BUILTIN_PSRADI128
:
17651 case IX86_BUILTIN_PSRADI128_MASK
:
17652 case IX86_BUILTIN_PSRADI256
:
17653 case IX86_BUILTIN_PSRADI256_MASK
:
17654 case IX86_BUILTIN_PSRADI512
:
17655 case IX86_BUILTIN_PSRAQ128_MASK
:
17656 case IX86_BUILTIN_PSRAQ256_MASK
:
17657 case IX86_BUILTIN_PSRAQ512
:
17658 case IX86_BUILTIN_PSRAQI128_MASK
:
17659 case IX86_BUILTIN_PSRAQI256_MASK
:
17660 case IX86_BUILTIN_PSRAQI512
:
17661 case IX86_BUILTIN_PSRAW
:
17662 case IX86_BUILTIN_PSRAW128
:
17663 case IX86_BUILTIN_PSRAW128_MASK
:
17664 case IX86_BUILTIN_PSRAW256
:
17665 case IX86_BUILTIN_PSRAW256_MASK
:
17666 case IX86_BUILTIN_PSRAW512
:
17667 case IX86_BUILTIN_PSRAWI
:
17668 case IX86_BUILTIN_PSRAWI128
:
17669 case IX86_BUILTIN_PSRAWI128_MASK
:
17670 case IX86_BUILTIN_PSRAWI256
:
17671 case IX86_BUILTIN_PSRAWI256_MASK
:
17672 case IX86_BUILTIN_PSRAWI512
:
17676 case IX86_BUILTIN_PSRLD
:
17677 case IX86_BUILTIN_PSRLD128
:
17678 case IX86_BUILTIN_PSRLD128_MASK
:
17679 case IX86_BUILTIN_PSRLD256
:
17680 case IX86_BUILTIN_PSRLD256_MASK
:
17681 case IX86_BUILTIN_PSRLD512
:
17682 case IX86_BUILTIN_PSRLDI
:
17683 case IX86_BUILTIN_PSRLDI128
:
17684 case IX86_BUILTIN_PSRLDI128_MASK
:
17685 case IX86_BUILTIN_PSRLDI256
:
17686 case IX86_BUILTIN_PSRLDI256_MASK
:
17687 case IX86_BUILTIN_PSRLDI512
:
17688 case IX86_BUILTIN_PSRLQ
:
17689 case IX86_BUILTIN_PSRLQ128
:
17690 case IX86_BUILTIN_PSRLQ128_MASK
:
17691 case IX86_BUILTIN_PSRLQ256
:
17692 case IX86_BUILTIN_PSRLQ256_MASK
:
17693 case IX86_BUILTIN_PSRLQ512
:
17694 case IX86_BUILTIN_PSRLQI
:
17695 case IX86_BUILTIN_PSRLQI128
:
17696 case IX86_BUILTIN_PSRLQI128_MASK
:
17697 case IX86_BUILTIN_PSRLQI256
:
17698 case IX86_BUILTIN_PSRLQI256_MASK
:
17699 case IX86_BUILTIN_PSRLQI512
:
17700 case IX86_BUILTIN_PSRLW
:
17701 case IX86_BUILTIN_PSRLW128
:
17702 case IX86_BUILTIN_PSRLW128_MASK
:
17703 case IX86_BUILTIN_PSRLW256
:
17704 case IX86_BUILTIN_PSRLW256_MASK
:
17705 case IX86_BUILTIN_PSRLW512
:
17706 case IX86_BUILTIN_PSRLWI
:
17707 case IX86_BUILTIN_PSRLWI128
:
17708 case IX86_BUILTIN_PSRLWI128_MASK
:
17709 case IX86_BUILTIN_PSRLWI256
:
17710 case IX86_BUILTIN_PSRLWI256_MASK
:
17711 case IX86_BUILTIN_PSRLWI512
:
17715 case IX86_BUILTIN_PSLLVV16HI
:
17716 case IX86_BUILTIN_PSLLVV16SI
:
17717 case IX86_BUILTIN_PSLLVV2DI
:
17718 case IX86_BUILTIN_PSLLVV2DI_MASK
:
17719 case IX86_BUILTIN_PSLLVV32HI
:
17720 case IX86_BUILTIN_PSLLVV4DI
:
17721 case IX86_BUILTIN_PSLLVV4DI_MASK
:
17722 case IX86_BUILTIN_PSLLVV4SI
:
17723 case IX86_BUILTIN_PSLLVV4SI_MASK
:
17724 case IX86_BUILTIN_PSLLVV8DI
:
17725 case IX86_BUILTIN_PSLLVV8HI
:
17726 case IX86_BUILTIN_PSLLVV8SI
:
17727 case IX86_BUILTIN_PSLLVV8SI_MASK
:
17731 case IX86_BUILTIN_PSRAVQ128
:
17732 case IX86_BUILTIN_PSRAVQ256
:
17733 case IX86_BUILTIN_PSRAVV16HI
:
17734 case IX86_BUILTIN_PSRAVV16SI
:
17735 case IX86_BUILTIN_PSRAVV32HI
:
17736 case IX86_BUILTIN_PSRAVV4SI
:
17737 case IX86_BUILTIN_PSRAVV4SI_MASK
:
17738 case IX86_BUILTIN_PSRAVV8DI
:
17739 case IX86_BUILTIN_PSRAVV8HI
:
17740 case IX86_BUILTIN_PSRAVV8SI
:
17741 case IX86_BUILTIN_PSRAVV8SI_MASK
:
17745 case IX86_BUILTIN_PSRLVV16HI
:
17746 case IX86_BUILTIN_PSRLVV16SI
:
17747 case IX86_BUILTIN_PSRLVV2DI
:
17748 case IX86_BUILTIN_PSRLVV2DI_MASK
:
17749 case IX86_BUILTIN_PSRLVV32HI
:
17750 case IX86_BUILTIN_PSRLVV4DI
:
17751 case IX86_BUILTIN_PSRLVV4DI_MASK
:
17752 case IX86_BUILTIN_PSRLVV4SI
:
17753 case IX86_BUILTIN_PSRLVV4SI_MASK
:
17754 case IX86_BUILTIN_PSRLVV8DI
:
17755 case IX86_BUILTIN_PSRLVV8HI
:
17756 case IX86_BUILTIN_PSRLVV8SI
:
17757 case IX86_BUILTIN_PSRLVV8SI_MASK
:
17763 gcc_assert (n_args
>= 2);
17764 if (TREE_CODE (args
[0]) != VECTOR_CST
)
17766 mask
= HOST_WIDE_INT_M1U
;
17769 /* This is masked shift. */
17770 if (!tree_fits_uhwi_p (args
[n_args
- 1])
17771 || TREE_SIDE_EFFECTS (args
[n_args
- 2]))
17773 mask
= tree_to_uhwi (args
[n_args
- 1]);
17774 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0]));
17775 mask
|= HOST_WIDE_INT_M1U
<< elems
;
17776 if (mask
!= HOST_WIDE_INT_M1U
17777 && TREE_CODE (args
[n_args
- 2]) != VECTOR_CST
)
17779 if (mask
== (HOST_WIDE_INT_M1U
<< elems
))
17780 return args
[n_args
- 2];
17782 if (is_vshift
&& TREE_CODE (args
[1]) != VECTOR_CST
)
17784 if (tree tem
= (is_vshift
? integer_one_node
17785 : ix86_vector_shift_count (args
[1])))
17787 unsigned HOST_WIDE_INT count
= tree_to_uhwi (tem
);
17788 unsigned HOST_WIDE_INT prec
17789 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args
[0])));
17790 if (count
== 0 && mask
== HOST_WIDE_INT_M1U
)
17794 if (rcode
== ASHIFTRT
)
17796 else if (mask
== HOST_WIDE_INT_M1U
)
17797 return build_zero_cst (TREE_TYPE (args
[0]));
17799 tree countt
= NULL_TREE
;
17803 countt
= integer_zero_node
;
17805 countt
= build_int_cst (integer_type_node
, count
);
17807 tree_vector_builder builder
;
17808 if (mask
!= HOST_WIDE_INT_M1U
|| is_vshift
)
17809 builder
.new_vector (TREE_TYPE (args
[0]),
17810 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0])),
17813 builder
.new_unary_operation (TREE_TYPE (args
[0]), args
[0],
17815 unsigned int cnt
= builder
.encoded_nelts ();
17816 for (unsigned int i
= 0; i
< cnt
; ++i
)
17818 tree elt
= VECTOR_CST_ELT (args
[0], i
);
17819 if (TREE_CODE (elt
) != INTEGER_CST
|| TREE_OVERFLOW (elt
))
17821 tree type
= TREE_TYPE (elt
);
17822 if (rcode
== LSHIFTRT
)
17823 elt
= fold_convert (unsigned_type_for (type
), elt
);
17826 countt
= VECTOR_CST_ELT (args
[1], i
);
17827 if (TREE_CODE (countt
) != INTEGER_CST
17828 || TREE_OVERFLOW (countt
))
17830 if (wi::neg_p (wi::to_wide (countt
))
17831 || wi::to_widest (countt
) >= prec
)
17833 if (rcode
== ASHIFTRT
)
17834 countt
= build_int_cst (TREE_TYPE (countt
),
17838 elt
= build_zero_cst (TREE_TYPE (elt
));
17839 countt
= build_zero_cst (TREE_TYPE (countt
));
17843 else if (count
>= prec
)
17844 elt
= build_zero_cst (TREE_TYPE (elt
));
17845 elt
= const_binop (rcode
== ASHIFT
17846 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
17847 TREE_TYPE (elt
), elt
, countt
);
17848 if (!elt
|| TREE_CODE (elt
) != INTEGER_CST
)
17850 if (rcode
== LSHIFTRT
)
17851 elt
= fold_convert (type
, elt
);
17852 if ((mask
& (HOST_WIDE_INT_1U
<< i
)) == 0)
17854 elt
= VECTOR_CST_ELT (args
[n_args
- 2], i
);
17855 if (TREE_CODE (elt
) != INTEGER_CST
17856 || TREE_OVERFLOW (elt
))
17859 builder
.quick_push (elt
);
17861 return builder
.build ();
17870 #ifdef SUBTARGET_FOLD_BUILTIN
17871 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
17877 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17878 constant) in GIMPLE. */
17881 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
17883 gimple
*stmt
= gsi_stmt (*gsi
);
17884 tree fndecl
= gimple_call_fndecl (stmt
);
17885 gcc_checking_assert (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
));
17886 int n_args
= gimple_call_num_args (stmt
);
17887 enum ix86_builtins fn_code
17888 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
17889 tree decl
= NULL_TREE
;
17890 tree arg0
, arg1
, arg2
;
17891 enum rtx_code rcode
;
17892 unsigned HOST_WIDE_INT count
;
17897 case IX86_BUILTIN_TZCNT32
:
17898 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
17899 goto fold_tzcnt_lzcnt
;
17901 case IX86_BUILTIN_TZCNT64
:
17902 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
17903 goto fold_tzcnt_lzcnt
;
17905 case IX86_BUILTIN_LZCNT32
:
17906 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
17907 goto fold_tzcnt_lzcnt
;
17909 case IX86_BUILTIN_LZCNT64
:
17910 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
17911 goto fold_tzcnt_lzcnt
;
17914 gcc_assert (n_args
== 1);
17915 arg0
= gimple_call_arg (stmt
, 0);
17916 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
17918 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
17919 /* If arg0 is provably non-zero, optimize into generic
17920 __builtin_c[tl]z{,ll} function the middle-end handles
17922 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
17925 location_t loc
= gimple_location (stmt
);
17926 gimple
*g
= gimple_build_call (decl
, 1, arg0
);
17927 gimple_set_location (g
, loc
);
17928 tree lhs
= make_ssa_name (integer_type_node
);
17929 gimple_call_set_lhs (g
, lhs
);
17930 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
17931 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
17932 gimple_set_location (g
, loc
);
17933 gsi_replace (gsi
, g
, false);
17938 case IX86_BUILTIN_BZHI32
:
17939 case IX86_BUILTIN_BZHI64
:
17940 gcc_assert (n_args
== 2);
17941 arg1
= gimple_call_arg (stmt
, 1);
17942 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
17944 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
17945 arg0
= gimple_call_arg (stmt
, 0);
17946 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
17948 location_t loc
= gimple_location (stmt
);
17949 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17950 gimple_set_location (g
, loc
);
17951 gsi_replace (gsi
, g
, false);
17956 case IX86_BUILTIN_PDEP32
:
17957 case IX86_BUILTIN_PDEP64
:
17958 case IX86_BUILTIN_PEXT32
:
17959 case IX86_BUILTIN_PEXT64
:
17960 gcc_assert (n_args
== 2);
17961 arg1
= gimple_call_arg (stmt
, 1);
17962 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
17964 location_t loc
= gimple_location (stmt
);
17965 arg0
= gimple_call_arg (stmt
, 0);
17966 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17967 gimple_set_location (g
, loc
);
17968 gsi_replace (gsi
, g
, false);
17973 case IX86_BUILTIN_PSLLD
:
17974 case IX86_BUILTIN_PSLLD128
:
17975 case IX86_BUILTIN_PSLLD128_MASK
:
17976 case IX86_BUILTIN_PSLLD256
:
17977 case IX86_BUILTIN_PSLLD256_MASK
:
17978 case IX86_BUILTIN_PSLLD512
:
17979 case IX86_BUILTIN_PSLLDI
:
17980 case IX86_BUILTIN_PSLLDI128
:
17981 case IX86_BUILTIN_PSLLDI128_MASK
:
17982 case IX86_BUILTIN_PSLLDI256
:
17983 case IX86_BUILTIN_PSLLDI256_MASK
:
17984 case IX86_BUILTIN_PSLLDI512
:
17985 case IX86_BUILTIN_PSLLQ
:
17986 case IX86_BUILTIN_PSLLQ128
:
17987 case IX86_BUILTIN_PSLLQ128_MASK
:
17988 case IX86_BUILTIN_PSLLQ256
:
17989 case IX86_BUILTIN_PSLLQ256_MASK
:
17990 case IX86_BUILTIN_PSLLQ512
:
17991 case IX86_BUILTIN_PSLLQI
:
17992 case IX86_BUILTIN_PSLLQI128
:
17993 case IX86_BUILTIN_PSLLQI128_MASK
:
17994 case IX86_BUILTIN_PSLLQI256
:
17995 case IX86_BUILTIN_PSLLQI256_MASK
:
17996 case IX86_BUILTIN_PSLLQI512
:
17997 case IX86_BUILTIN_PSLLW
:
17998 case IX86_BUILTIN_PSLLW128
:
17999 case IX86_BUILTIN_PSLLW128_MASK
:
18000 case IX86_BUILTIN_PSLLW256
:
18001 case IX86_BUILTIN_PSLLW256_MASK
:
18002 case IX86_BUILTIN_PSLLW512_MASK
:
18003 case IX86_BUILTIN_PSLLWI
:
18004 case IX86_BUILTIN_PSLLWI128
:
18005 case IX86_BUILTIN_PSLLWI128_MASK
:
18006 case IX86_BUILTIN_PSLLWI256
:
18007 case IX86_BUILTIN_PSLLWI256_MASK
:
18008 case IX86_BUILTIN_PSLLWI512_MASK
:
18012 case IX86_BUILTIN_PSRAD
:
18013 case IX86_BUILTIN_PSRAD128
:
18014 case IX86_BUILTIN_PSRAD128_MASK
:
18015 case IX86_BUILTIN_PSRAD256
:
18016 case IX86_BUILTIN_PSRAD256_MASK
:
18017 case IX86_BUILTIN_PSRAD512
:
18018 case IX86_BUILTIN_PSRADI
:
18019 case IX86_BUILTIN_PSRADI128
:
18020 case IX86_BUILTIN_PSRADI128_MASK
:
18021 case IX86_BUILTIN_PSRADI256
:
18022 case IX86_BUILTIN_PSRADI256_MASK
:
18023 case IX86_BUILTIN_PSRADI512
:
18024 case IX86_BUILTIN_PSRAQ128_MASK
:
18025 case IX86_BUILTIN_PSRAQ256_MASK
:
18026 case IX86_BUILTIN_PSRAQ512
:
18027 case IX86_BUILTIN_PSRAQI128_MASK
:
18028 case IX86_BUILTIN_PSRAQI256_MASK
:
18029 case IX86_BUILTIN_PSRAQI512
:
18030 case IX86_BUILTIN_PSRAW
:
18031 case IX86_BUILTIN_PSRAW128
:
18032 case IX86_BUILTIN_PSRAW128_MASK
:
18033 case IX86_BUILTIN_PSRAW256
:
18034 case IX86_BUILTIN_PSRAW256_MASK
:
18035 case IX86_BUILTIN_PSRAW512
:
18036 case IX86_BUILTIN_PSRAWI
:
18037 case IX86_BUILTIN_PSRAWI128
:
18038 case IX86_BUILTIN_PSRAWI128_MASK
:
18039 case IX86_BUILTIN_PSRAWI256
:
18040 case IX86_BUILTIN_PSRAWI256_MASK
:
18041 case IX86_BUILTIN_PSRAWI512
:
18045 case IX86_BUILTIN_PSRLD
:
18046 case IX86_BUILTIN_PSRLD128
:
18047 case IX86_BUILTIN_PSRLD128_MASK
:
18048 case IX86_BUILTIN_PSRLD256
:
18049 case IX86_BUILTIN_PSRLD256_MASK
:
18050 case IX86_BUILTIN_PSRLD512
:
18051 case IX86_BUILTIN_PSRLDI
:
18052 case IX86_BUILTIN_PSRLDI128
:
18053 case IX86_BUILTIN_PSRLDI128_MASK
:
18054 case IX86_BUILTIN_PSRLDI256
:
18055 case IX86_BUILTIN_PSRLDI256_MASK
:
18056 case IX86_BUILTIN_PSRLDI512
:
18057 case IX86_BUILTIN_PSRLQ
:
18058 case IX86_BUILTIN_PSRLQ128
:
18059 case IX86_BUILTIN_PSRLQ128_MASK
:
18060 case IX86_BUILTIN_PSRLQ256
:
18061 case IX86_BUILTIN_PSRLQ256_MASK
:
18062 case IX86_BUILTIN_PSRLQ512
:
18063 case IX86_BUILTIN_PSRLQI
:
18064 case IX86_BUILTIN_PSRLQI128
:
18065 case IX86_BUILTIN_PSRLQI128_MASK
:
18066 case IX86_BUILTIN_PSRLQI256
:
18067 case IX86_BUILTIN_PSRLQI256_MASK
:
18068 case IX86_BUILTIN_PSRLQI512
:
18069 case IX86_BUILTIN_PSRLW
:
18070 case IX86_BUILTIN_PSRLW128
:
18071 case IX86_BUILTIN_PSRLW128_MASK
:
18072 case IX86_BUILTIN_PSRLW256
:
18073 case IX86_BUILTIN_PSRLW256_MASK
:
18074 case IX86_BUILTIN_PSRLW512
:
18075 case IX86_BUILTIN_PSRLWI
:
18076 case IX86_BUILTIN_PSRLWI128
:
18077 case IX86_BUILTIN_PSRLWI128_MASK
:
18078 case IX86_BUILTIN_PSRLWI256
:
18079 case IX86_BUILTIN_PSRLWI256_MASK
:
18080 case IX86_BUILTIN_PSRLWI512
:
18084 case IX86_BUILTIN_PSLLVV16HI
:
18085 case IX86_BUILTIN_PSLLVV16SI
:
18086 case IX86_BUILTIN_PSLLVV2DI
:
18087 case IX86_BUILTIN_PSLLVV2DI_MASK
:
18088 case IX86_BUILTIN_PSLLVV32HI
:
18089 case IX86_BUILTIN_PSLLVV4DI
:
18090 case IX86_BUILTIN_PSLLVV4DI_MASK
:
18091 case IX86_BUILTIN_PSLLVV4SI
:
18092 case IX86_BUILTIN_PSLLVV4SI_MASK
:
18093 case IX86_BUILTIN_PSLLVV8DI
:
18094 case IX86_BUILTIN_PSLLVV8HI
:
18095 case IX86_BUILTIN_PSLLVV8SI
:
18096 case IX86_BUILTIN_PSLLVV8SI_MASK
:
18100 case IX86_BUILTIN_PSRAVQ128
:
18101 case IX86_BUILTIN_PSRAVQ256
:
18102 case IX86_BUILTIN_PSRAVV16HI
:
18103 case IX86_BUILTIN_PSRAVV16SI
:
18104 case IX86_BUILTIN_PSRAVV32HI
:
18105 case IX86_BUILTIN_PSRAVV4SI
:
18106 case IX86_BUILTIN_PSRAVV4SI_MASK
:
18107 case IX86_BUILTIN_PSRAVV8DI
:
18108 case IX86_BUILTIN_PSRAVV8HI
:
18109 case IX86_BUILTIN_PSRAVV8SI
:
18110 case IX86_BUILTIN_PSRAVV8SI_MASK
:
18114 case IX86_BUILTIN_PSRLVV16HI
:
18115 case IX86_BUILTIN_PSRLVV16SI
:
18116 case IX86_BUILTIN_PSRLVV2DI
:
18117 case IX86_BUILTIN_PSRLVV2DI_MASK
:
18118 case IX86_BUILTIN_PSRLVV32HI
:
18119 case IX86_BUILTIN_PSRLVV4DI
:
18120 case IX86_BUILTIN_PSRLVV4DI_MASK
:
18121 case IX86_BUILTIN_PSRLVV4SI
:
18122 case IX86_BUILTIN_PSRLVV4SI_MASK
:
18123 case IX86_BUILTIN_PSRLVV8DI
:
18124 case IX86_BUILTIN_PSRLVV8HI
:
18125 case IX86_BUILTIN_PSRLVV8SI
:
18126 case IX86_BUILTIN_PSRLVV8SI_MASK
:
18132 gcc_assert (n_args
>= 2);
18133 arg0
= gimple_call_arg (stmt
, 0);
18134 arg1
= gimple_call_arg (stmt
, 1);
18137 /* This is masked shift. Only optimize if the mask is all ones. */
18138 tree argl
= gimple_call_arg (stmt
, n_args
- 1);
18139 if (!tree_fits_uhwi_p (argl
))
18141 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (argl
);
18142 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
18143 if ((mask
| (HOST_WIDE_INT_M1U
<< elems
)) != HOST_WIDE_INT_M1U
)
18148 if (TREE_CODE (arg1
) != VECTOR_CST
)
18150 count
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
)));
18151 if (integer_zerop (arg1
))
18153 else if (rcode
== ASHIFTRT
)
18156 for (unsigned int i
= 0; i
< VECTOR_CST_NELTS (arg1
); ++i
)
18158 tree elt
= VECTOR_CST_ELT (arg1
, i
);
18159 if (!wi::neg_p (wi::to_wide (elt
))
18160 && wi::to_widest (elt
) < count
)
18166 arg1
= ix86_vector_shift_count (arg1
);
18169 count
= tree_to_uhwi (arg1
);
18173 /* Just return the first argument for shift by 0. */
18174 location_t loc
= gimple_location (stmt
);
18175 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
18176 gimple_set_location (g
, loc
);
18177 gsi_replace (gsi
, g
, false);
18180 if (rcode
!= ASHIFTRT
18181 && count
>= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
))))
18183 /* For shift counts equal or greater than precision, except for
18184 arithmetic right shift the result is zero. */
18185 location_t loc
= gimple_location (stmt
);
18186 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
18187 build_zero_cst (TREE_TYPE (arg0
)));
18188 gimple_set_location (g
, loc
);
18189 gsi_replace (gsi
, g
, false);
18194 case IX86_BUILTIN_SHUFPD
:
18195 arg2
= gimple_call_arg (stmt
, 2);
18196 if (TREE_CODE (arg2
) == INTEGER_CST
)
18198 location_t loc
= gimple_location (stmt
);
18199 unsigned HOST_WIDE_INT imask
= TREE_INT_CST_LOW (arg2
);
18200 arg0
= gimple_call_arg (stmt
, 0);
18201 arg1
= gimple_call_arg (stmt
, 1);
18202 tree itype
= long_long_integer_type_node
;
18203 tree vtype
= build_vector_type (itype
, 2); /* V2DI */
18204 tree_vector_builder
elts (vtype
, 2, 1);
18205 /* Ignore bits other than the lowest 2. */
18206 elts
.quick_push (build_int_cst (itype
, imask
& 1));
18208 elts
.quick_push (build_int_cst (itype
, 2 + (imask
& 1)));
18209 tree omask
= elts
.build ();
18210 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
18212 arg0
, arg1
, omask
);
18213 gimple_set_location (g
, loc
);
18214 gsi_replace (gsi
, g
, false);
18217 // Do not error yet, the constant could be propagated later?
18227 /* Handler for an SVML-style interface to
18228 a library with vectorized intrinsics. */
18231 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
18234 tree fntype
, new_fndecl
, args
;
18237 machine_mode el_mode
, in_mode
;
18240 /* The SVML is suitable for unsafe math only. */
18241 if (!flag_unsafe_math_optimizations
)
18244 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18245 n
= TYPE_VECTOR_SUBPARTS (type_out
);
18246 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18247 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18248 if (el_mode
!= in_mode
18272 if ((el_mode
!= DFmode
|| n
!= 2)
18273 && (el_mode
!= SFmode
|| n
!= 4))
18281 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
18282 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
18284 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
18285 strcpy (name
, "vmlsLn4");
18286 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
18287 strcpy (name
, "vmldLn2");
18290 sprintf (name
, "vmls%s", bname
+10);
18291 name
[strlen (name
)-1] = '4';
18294 sprintf (name
, "vmld%s2", bname
+10);
18296 /* Convert to uppercase. */
18300 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
18304 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
18306 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
18308 /* Build a function declaration for the vectorized function. */
18309 new_fndecl
= build_decl (BUILTINS_LOCATION
,
18310 FUNCTION_DECL
, get_identifier (name
), fntype
);
18311 TREE_PUBLIC (new_fndecl
) = 1;
18312 DECL_EXTERNAL (new_fndecl
) = 1;
18313 DECL_IS_NOVOPS (new_fndecl
) = 1;
18314 TREE_READONLY (new_fndecl
) = 1;
18319 /* Handler for an ACML-style interface to
18320 a library with vectorized intrinsics. */
18323 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
18325 char name
[20] = "__vr.._";
18326 tree fntype
, new_fndecl
, args
;
18329 machine_mode el_mode
, in_mode
;
18332 /* The ACML is 64bits only and suitable for unsafe math only as
18333 it does not correctly support parts of IEEE with the required
18334 precision such as denormals. */
18336 || !flag_unsafe_math_optimizations
)
18339 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18340 n
= TYPE_VECTOR_SUBPARTS (type_out
);
18341 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18342 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18343 if (el_mode
!= in_mode
18355 if (el_mode
== DFmode
&& n
== 2)
18360 else if (el_mode
== SFmode
&& n
== 4)
18373 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
18374 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
18375 sprintf (name
+ 7, "%s", bname
+10);
18378 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
18382 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
18384 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
18386 /* Build a function declaration for the vectorized function. */
18387 new_fndecl
= build_decl (BUILTINS_LOCATION
,
18388 FUNCTION_DECL
, get_identifier (name
), fntype
);
18389 TREE_PUBLIC (new_fndecl
) = 1;
18390 DECL_EXTERNAL (new_fndecl
) = 1;
18391 DECL_IS_NOVOPS (new_fndecl
) = 1;
18392 TREE_READONLY (new_fndecl
) = 1;
18397 /* Returns a decl of a function that implements scatter store with
18398 register type VECTYPE and index type INDEX_TYPE and SCALE.
18399 Return NULL_TREE if it is not available. */
18402 ix86_vectorize_builtin_scatter (const_tree vectype
,
18403 const_tree index_type
, int scale
)
18406 enum ix86_builtins code
;
18408 if (!TARGET_AVX512F
)
18411 if ((TREE_CODE (index_type
) != INTEGER_TYPE
18412 && !POINTER_TYPE_P (index_type
))
18413 || (TYPE_MODE (index_type
) != SImode
18414 && TYPE_MODE (index_type
) != DImode
))
18417 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
18420 /* v*scatter* insn sign extends index to pointer mode. */
18421 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
18422 && TYPE_UNSIGNED (index_type
))
18425 /* Scale can be 1, 2, 4 or 8. */
18428 || (scale
& (scale
- 1)) != 0)
18431 si
= TYPE_MODE (index_type
) == SImode
;
18432 switch (TYPE_MODE (vectype
))
18435 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
18438 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
18441 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
18444 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
18447 if (TARGET_AVX512VL
)
18448 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DF
: IX86_BUILTIN_SCATTERDIV4DF
;
18453 if (TARGET_AVX512VL
)
18454 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DI
: IX86_BUILTIN_SCATTERDIV4DI
;
18459 if (TARGET_AVX512VL
)
18460 code
= si
? IX86_BUILTIN_SCATTERSIV8SF
: IX86_BUILTIN_SCATTERALTDIV8SF
;
18465 if (TARGET_AVX512VL
)
18466 code
= si
? IX86_BUILTIN_SCATTERSIV8SI
: IX86_BUILTIN_SCATTERALTDIV8SI
;
18471 if (TARGET_AVX512VL
)
18472 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DF
: IX86_BUILTIN_SCATTERDIV2DF
;
18477 if (TARGET_AVX512VL
)
18478 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DI
: IX86_BUILTIN_SCATTERDIV2DI
;
18483 if (TARGET_AVX512VL
)
18484 code
= si
? IX86_BUILTIN_SCATTERSIV4SF
: IX86_BUILTIN_SCATTERALTDIV4SF
;
18489 if (TARGET_AVX512VL
)
18490 code
= si
? IX86_BUILTIN_SCATTERSIV4SI
: IX86_BUILTIN_SCATTERALTDIV4SI
;
18498 return get_ix86_builtin (code
);
18501 /* Return true if it is safe to use the rsqrt optabs to optimize
18507 return (TARGET_SSE
&& TARGET_SSE_MATH
18508 && flag_finite_math_only
18509 && !flag_trapping_math
18510 && flag_unsafe_math_optimizations
);
18513 /* Helper for avx_vpermilps256_operand et al. This is also used by
18514 the expansion functions to turn the parallel back into a mask.
18515 The return value is 0 for no match and the imm8+1 for a match. */
18518 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
18520 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
18522 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
18524 if (XVECLEN (par
, 0) != (int) nelt
)
18527 /* Validate that all of the elements are constants, and not totally
18528 out of range. Copy the data into an integral array to make the
18529 subsequent checks easier. */
18530 for (i
= 0; i
< nelt
; ++i
)
18532 rtx er
= XVECEXP (par
, 0, i
);
18533 unsigned HOST_WIDE_INT ei
;
18535 if (!CONST_INT_P (er
))
18546 /* In the 512-bit DFmode case, we can only move elements within
18547 a 128-bit lane. First fill the second part of the mask,
18549 for (i
= 4; i
< 6; ++i
)
18551 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
18553 mask
|= (ipar
[i
] - 4) << i
;
18555 for (i
= 6; i
< 8; ++i
)
18559 mask
|= (ipar
[i
] - 6) << i
;
18564 /* In the 256-bit DFmode case, we can only move elements within
18566 for (i
= 0; i
< 2; ++i
)
18570 mask
|= ipar
[i
] << i
;
18572 for (i
= 2; i
< 4; ++i
)
18576 mask
|= (ipar
[i
] - 2) << i
;
18581 /* In 512 bit SFmode case, permutation in the upper 256 bits
18582 must mirror the permutation in the lower 256-bits. */
18583 for (i
= 0; i
< 8; ++i
)
18584 if (ipar
[i
] + 8 != ipar
[i
+ 8])
18589 /* In 256 bit SFmode case, we have full freedom of
18590 movement within the low 128-bit lane, but the high 128-bit
18591 lane must mirror the exact same pattern. */
18592 for (i
= 0; i
< 4; ++i
)
18593 if (ipar
[i
] + 4 != ipar
[i
+ 4])
18600 /* In the 128-bit case, we've full freedom in the placement of
18601 the elements from the source operand. */
18602 for (i
= 0; i
< nelt
; ++i
)
18603 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
18607 gcc_unreachable ();
18610 /* Make sure success has a non-zero value by adding one. */
18614 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18615 the expansion functions to turn the parallel back into a mask.
18616 The return value is 0 for no match and the imm8+1 for a match. */
18619 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
18621 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
18623 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
18625 if (XVECLEN (par
, 0) != (int) nelt
)
18628 /* Validate that all of the elements are constants, and not totally
18629 out of range. Copy the data into an integral array to make the
18630 subsequent checks easier. */
18631 for (i
= 0; i
< nelt
; ++i
)
18633 rtx er
= XVECEXP (par
, 0, i
);
18634 unsigned HOST_WIDE_INT ei
;
18636 if (!CONST_INT_P (er
))
18639 if (ei
>= 2 * nelt
)
18644 /* Validate that the halves of the permute are halves. */
18645 for (i
= 0; i
< nelt2
- 1; ++i
)
18646 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18648 for (i
= nelt2
; i
< nelt
- 1; ++i
)
18649 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18652 /* Reconstruct the mask. */
18653 for (i
= 0; i
< 2; ++i
)
18655 unsigned e
= ipar
[i
* nelt2
];
18659 mask
|= e
<< (i
* 4);
18662 /* Make sure success has a non-zero value by adding one. */
18666 /* Return a register priority for hard reg REGNO. */
18668 ix86_register_priority (int hard_regno
)
18670 /* ebp and r13 as the base always wants a displacement, r12 as the
18671 base always wants an index. So discourage their usage in an
18673 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
18675 if (hard_regno
== BP_REG
)
18677 /* New x86-64 int registers result in bigger code size. Discourage
18679 if (IN_RANGE (hard_regno
, FIRST_REX_INT_REG
, LAST_REX_INT_REG
))
18681 /* New x86-64 SSE registers result in bigger code size. Discourage
18683 if (IN_RANGE (hard_regno
, FIRST_REX_SSE_REG
, LAST_REX_SSE_REG
))
18685 if (IN_RANGE (hard_regno
, FIRST_EXT_REX_SSE_REG
, LAST_EXT_REX_SSE_REG
))
18687 /* Usage of AX register results in smaller code. Prefer it. */
18688 if (hard_regno
== AX_REG
)
18693 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18695 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18696 QImode must go into class Q_REGS.
18697 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18698 movdf to do mem-to-mem moves through integer regs. */
18701 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
18703 machine_mode mode
= GET_MODE (x
);
18705 /* We're only allowed to return a subclass of CLASS. Many of the
18706 following checks fail for NO_REGS, so eliminate that early. */
18707 if (regclass
== NO_REGS
)
18710 /* All classes can load zeros. */
18711 if (x
== CONST0_RTX (mode
))
18714 /* Force constants into memory if we are loading a (nonzero) constant into
18715 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18716 instructions to load from a constant. */
18718 && (MAYBE_MMX_CLASS_P (regclass
)
18719 || MAYBE_SSE_CLASS_P (regclass
)
18720 || MAYBE_MASK_CLASS_P (regclass
)))
18723 /* Floating-point constants need more complex checks. */
18724 if (CONST_DOUBLE_P (x
))
18726 /* General regs can load everything. */
18727 if (INTEGER_CLASS_P (regclass
))
18730 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18731 zero above. We only want to wind up preferring 80387 registers if
18732 we plan on doing computation with them. */
18733 if (IS_STACK_MODE (mode
)
18734 && standard_80387_constant_p (x
) > 0)
18736 /* Limit class to FP regs. */
18737 if (FLOAT_CLASS_P (regclass
))
18744 /* Prefer SSE regs only, if we can use them for math. */
18745 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18746 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18748 /* Generally when we see PLUS here, it's the function invariant
18749 (plus soft-fp const_int). Which can only be computed into general
18751 if (GET_CODE (x
) == PLUS
)
18752 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18754 /* QImode constants are easy to load, but non-constant QImode data
18755 must go into Q_REGS or ALL_MASK_REGS. */
18756 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18758 if (Q_CLASS_P (regclass
))
18760 else if (reg_class_subset_p (Q_REGS
, regclass
))
18762 else if (MASK_CLASS_P (regclass
))
18771 /* Discourage putting floating-point values in SSE registers unless
18772 SSE math is being used, and likewise for the 387 registers. */
18774 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
18776 /* Restrict the output reload class to the register bank that we are doing
18777 math on. If we would like not to return a subset of CLASS, reject this
18778 alternative: if reload cannot do this, it will still use its choice. */
18779 machine_mode mode
= GET_MODE (x
);
18780 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18781 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
18783 if (IS_STACK_MODE (mode
))
18784 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18790 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
18791 machine_mode mode
, secondary_reload_info
*sri
)
18793 /* Double-word spills from general registers to non-offsettable memory
18794 references (zero-extended addresses) require special handling. */
18797 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
18798 && INTEGER_CLASS_P (rclass
)
18799 && !offsettable_memref_p (x
))
18802 ? CODE_FOR_reload_noff_load
18803 : CODE_FOR_reload_noff_store
);
18804 /* Add the cost of moving address to a temporary. */
18805 sri
->extra_cost
= 1;
18810 /* QImode spills from non-QI registers require
18811 intermediate register on 32bit targets. */
18813 && ((!TARGET_64BIT
&& !in_p
18814 && INTEGER_CLASS_P (rclass
)
18815 && MAYBE_NON_Q_CLASS_P (rclass
))
18816 || (!TARGET_AVX512DQ
18817 && MAYBE_MASK_CLASS_P (rclass
))))
18819 int regno
= true_regnum (x
);
18821 /* Return Q_REGS if the operand is in memory. */
18828 /* This condition handles corner case where an expression involving
18829 pointers gets vectorized. We're trying to use the address of a
18830 stack slot as a vector initializer.
18832 (set (reg:V2DI 74 [ vect_cst_.2 ])
18833 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18835 Eventually frame gets turned into sp+offset like this:
18837 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18838 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18839 (const_int 392 [0x188]))))
18841 That later gets turned into:
18843 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18844 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18845 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18847 We'll have the following reload recorded:
18849 Reload 0: reload_in (DI) =
18850 (plus:DI (reg/f:DI 7 sp)
18851 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18852 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18853 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18854 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18855 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18856 reload_reg_rtx: (reg:V2DI 22 xmm1)
18858 Which isn't going to work since SSE instructions can't handle scalar
18859 additions. Returning GENERAL_REGS forces the addition into integer
18860 register and reload can handle subsequent reloads without problems. */
18862 if (in_p
&& GET_CODE (x
) == PLUS
18863 && SSE_CLASS_P (rclass
)
18864 && SCALAR_INT_MODE_P (mode
))
18865 return GENERAL_REGS
;
18870 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18873 ix86_class_likely_spilled_p (reg_class_t rclass
)
18884 case SSE_FIRST_REG
:
18886 case FP_SECOND_REG
:
18896 /* If we are copying between registers from different register sets
18897 (e.g. FP and integer), we may need a memory location.
18899 The function can't work reliably when one of the CLASSES is a class
18900 containing registers from multiple sets. We avoid this by never combining
18901 different sets in a single alternative in the machine description.
18902 Ensure that this constraint holds to avoid unexpected surprises.
18904 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18905 so do not enforce these sanity checks.
18907 To optimize register_move_cost performance, define inline variant. */
18910 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18911 reg_class_t class2
, int strict
)
18913 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
18916 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18917 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18918 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18919 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18920 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18921 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
18922 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
18923 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
18925 gcc_assert (!strict
|| lra_in_progress
);
18929 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18932 /* ??? This is a lie. We do have moves between mmx/general, and for
18933 mmx/sse2. But by saying we need secondary memory we discourage the
18934 register allocator from using the mmx registers unless needed. */
18935 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18938 /* Between mask and general, we have moves no larger than word size. */
18939 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
18941 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
))
18942 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18946 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18948 /* SSE1 doesn't have any direct moves from other classes. */
18952 /* Between SSE and general, we have moves no larger than word size. */
18953 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
))
18954 || GET_MODE_SIZE (mode
) < GET_MODE_SIZE (SImode
)
18955 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18958 /* If the target says that inter-unit moves are more expensive
18959 than moving through memory, then don't generate them. */
18960 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
18961 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
18968 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18971 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18972 reg_class_t class2
)
18974 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
18977 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18979 get_secondary_mem widens integral modes to BITS_PER_WORD.
18980 There is no need to emit full 64 bit move on 64 bit targets
18981 for integral modes that can be moved using 32 bit move. */
18983 static machine_mode
18984 ix86_secondary_memory_needed_mode (machine_mode mode
)
18986 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
18987 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
18991 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18993 On the 80386, this is the size of MODE in words,
18994 except in the FP regs, where a single reg is always enough. */
18996 static unsigned char
18997 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
18999 if (MAYBE_INTEGER_CLASS_P (rclass
))
19001 if (mode
== XFmode
)
19002 return (TARGET_64BIT
? 2 : 3);
19003 else if (mode
== XCmode
)
19004 return (TARGET_64BIT
? 4 : 6);
19006 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19010 if (COMPLEX_MODE_P (mode
))
19017 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19020 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
19021 reg_class_t regclass
)
19026 /* x87 registers can't do subreg at all, as all values are reformatted
19027 to extended precision. */
19028 if (MAYBE_FLOAT_CLASS_P (regclass
))
19031 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
19033 /* Vector registers do not support QI or HImode loads. If we don't
19034 disallow a change to these modes, reload will assume it's ok to
19035 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19036 the vec_dupv4hi pattern. */
19037 if (GET_MODE_SIZE (from
) < 4)
19044 /* Return index of MODE in the sse load/store tables. */
19047 sse_store_index (machine_mode mode
)
19049 switch (GET_MODE_SIZE (mode
))
19066 /* Return the cost of moving data of mode M between a
19067 register and memory. A value of 2 is the default; this cost is
19068 relative to those in `REGISTER_MOVE_COST'.
19070 This function is used extensively by register_move_cost that is used to
19071 build tables at startup. Make it inline in this case.
19072 When IN is 2, return maximum of in and out move cost.
19074 If moving between registers and memory is more expensive than
19075 between two registers, you should define this macro to express the
19078 Model also increased moving costs of QImode registers in non
19082 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
, int in
)
19085 if (FLOAT_CLASS_P (regclass
))
19103 return MAX (ix86_cost
->hard_register
.fp_load
[index
],
19104 ix86_cost
->hard_register
.fp_store
[index
]);
19105 return in
? ix86_cost
->hard_register
.fp_load
[index
]
19106 : ix86_cost
->hard_register
.fp_store
[index
];
19108 if (SSE_CLASS_P (regclass
))
19110 int index
= sse_store_index (mode
);
19114 return MAX (ix86_cost
->hard_register
.sse_load
[index
],
19115 ix86_cost
->hard_register
.sse_store
[index
]);
19116 return in
? ix86_cost
->hard_register
.sse_load
[index
]
19117 : ix86_cost
->hard_register
.sse_store
[index
];
19119 if (MASK_CLASS_P (regclass
))
19122 switch (GET_MODE_SIZE (mode
))
19130 /* DImode loads and stores assumed to cost the same as SImode. */
19137 return MAX (ix86_cost
->hard_register
.mask_load
[index
],
19138 ix86_cost
->hard_register
.mask_store
[index
]);
19139 return in
? ix86_cost
->hard_register
.mask_load
[2]
19140 : ix86_cost
->hard_register
.mask_store
[2];
19142 if (MMX_CLASS_P (regclass
))
19145 switch (GET_MODE_SIZE (mode
))
19157 return MAX (ix86_cost
->hard_register
.mmx_load
[index
],
19158 ix86_cost
->hard_register
.mmx_store
[index
]);
19159 return in
? ix86_cost
->hard_register
.mmx_load
[index
]
19160 : ix86_cost
->hard_register
.mmx_store
[index
];
19162 switch (GET_MODE_SIZE (mode
))
19165 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
19168 return ix86_cost
->hard_register
.int_store
[0];
19169 if (TARGET_PARTIAL_REG_DEPENDENCY
19170 && optimize_function_for_speed_p (cfun
))
19171 cost
= ix86_cost
->hard_register
.movzbl_load
;
19173 cost
= ix86_cost
->hard_register
.int_load
[0];
19175 return MAX (cost
, ix86_cost
->hard_register
.int_store
[0]);
19181 return MAX (ix86_cost
->hard_register
.movzbl_load
,
19182 ix86_cost
->hard_register
.int_store
[0] + 4);
19184 return ix86_cost
->hard_register
.movzbl_load
;
19186 return ix86_cost
->hard_register
.int_store
[0] + 4;
19191 return MAX (ix86_cost
->hard_register
.int_load
[1],
19192 ix86_cost
->hard_register
.int_store
[1]);
19193 return in
? ix86_cost
->hard_register
.int_load
[1]
19194 : ix86_cost
->hard_register
.int_store
[1];
19197 cost
= MAX (ix86_cost
->hard_register
.int_load
[2],
19198 ix86_cost
->hard_register
.int_store
[2]);
19200 cost
= ix86_cost
->hard_register
.int_load
[2];
19202 cost
= ix86_cost
->hard_register
.int_store
[2];
19203 /* Multiply with the number of GPR moves needed. */
19204 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19209 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
19211 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
19215 /* Return the cost of moving data from a register in class CLASS1 to
19216 one in class CLASS2.
19218 It is not required that the cost always equal 2 when FROM is the same as TO;
19219 on some machines it is expensive to move between registers if they are not
19220 general registers. */
19223 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
19224 reg_class_t class2_i
)
19226 enum reg_class class1
= (enum reg_class
) class1_i
;
19227 enum reg_class class2
= (enum reg_class
) class2_i
;
19229 /* In case we require secondary memory, compute cost of the store followed
19230 by load. In order to avoid bad register allocation choices, we need
19231 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19233 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
19237 cost
+= inline_memory_move_cost (mode
, class1
, 2);
19238 cost
+= inline_memory_move_cost (mode
, class2
, 2);
19240 /* In case of copying from general_purpose_register we may emit multiple
19241 stores followed by single load causing memory size mismatch stall.
19242 Count this as arbitrarily high cost of 20. */
19243 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
19244 && TARGET_MEMORY_MISMATCH_STALL
19245 && targetm
.class_max_nregs (class1
, mode
)
19246 > targetm
.class_max_nregs (class2
, mode
))
19249 /* In the case of FP/MMX moves, the registers actually overlap, and we
19250 have to switch modes in order to treat them differently. */
19251 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
19252 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
19258 /* Moves between MMX and non-MMX units require secondary memory. */
19259 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
19260 gcc_unreachable ();
19262 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
19263 return (SSE_CLASS_P (class1
)
19264 ? ix86_cost
->hard_register
.sse_to_integer
19265 : ix86_cost
->hard_register
.integer_to_sse
);
19267 /* Moves between mask register and GPR. */
19268 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
19270 return (MASK_CLASS_P (class1
)
19271 ? ix86_cost
->hard_register
.mask_to_integer
19272 : ix86_cost
->hard_register
.integer_to_mask
);
19274 /* Moving between mask registers. */
19275 if (MASK_CLASS_P (class1
) && MASK_CLASS_P (class2
))
19276 return ix86_cost
->hard_register
.mask_move
;
19278 if (MAYBE_FLOAT_CLASS_P (class1
))
19279 return ix86_cost
->hard_register
.fp_move
;
19280 if (MAYBE_SSE_CLASS_P (class1
))
19282 if (GET_MODE_BITSIZE (mode
) <= 128)
19283 return ix86_cost
->hard_register
.xmm_move
;
19284 if (GET_MODE_BITSIZE (mode
) <= 256)
19285 return ix86_cost
->hard_register
.ymm_move
;
19286 return ix86_cost
->hard_register
.zmm_move
;
19288 if (MAYBE_MMX_CLASS_P (class1
))
19289 return ix86_cost
->hard_register
.mmx_move
;
19293 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
19294 words of a value of mode MODE but can be less for certain modes in
19295 special long registers.
19297 Actually there are no two word move instructions for consecutive
19298 registers. And only registers 0-3 may have mov byte instructions
19299 applied to them. */
19301 static unsigned int
19302 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
19304 if (GENERAL_REGNO_P (regno
))
19306 if (mode
== XFmode
)
19307 return TARGET_64BIT
? 2 : 3;
19308 if (mode
== XCmode
)
19309 return TARGET_64BIT
? 4 : 6;
19310 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19312 if (COMPLEX_MODE_P (mode
))
19314 /* Register pair for mask registers. */
19315 if (mode
== P2QImode
|| mode
== P2HImode
)
19317 if (mode
== V64SFmode
|| mode
== V64SImode
)
19322 /* Implement REGMODE_NATURAL_SIZE(MODE). */
19324 ix86_regmode_natural_size (machine_mode mode
)
19326 if (mode
== P2HImode
|| mode
== P2QImode
)
19327 return GET_MODE_SIZE (mode
) / 2;
19328 return UNITS_PER_WORD
;
19331 /* Implement TARGET_HARD_REGNO_MODE_OK. */
19334 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
19336 /* Flags and only flags can only hold CCmode values. */
19337 if (CC_REGNO_P (regno
))
19338 return GET_MODE_CLASS (mode
) == MODE_CC
;
19339 if (GET_MODE_CLASS (mode
) == MODE_CC
19340 || GET_MODE_CLASS (mode
) == MODE_RANDOM
)
19342 if (STACK_REGNO_P (regno
))
19343 return VALID_FP_MODE_P (mode
);
19344 if (MASK_REGNO_P (regno
))
19346 /* Register pair only starts at even register number. */
19347 if ((mode
== P2QImode
|| mode
== P2HImode
))
19348 return MASK_PAIR_REGNO_P(regno
);
19350 return ((TARGET_AVX512F
&& VALID_MASK_REG_MODE (mode
))
19351 || (TARGET_AVX512BW
19352 && VALID_MASK_AVX512BW_MODE (mode
)));
19355 if (GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
19358 if (SSE_REGNO_P (regno
))
19360 /* We implement the move patterns for all vector modes into and
19361 out of SSE registers, even when no operation instructions
19364 /* For AVX-512 we allow, regardless of regno:
19366 - any of 512-bit wide vector mode
19367 - any scalar mode. */
19370 || VALID_AVX512F_REG_MODE (mode
)
19371 || VALID_AVX512F_SCALAR_MODE (mode
)))
19374 /* For AVX-5124FMAPS or AVX-5124VNNIW
19375 allow V64SF and V64SI modes for special regnos. */
19376 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
19377 && (mode
== V64SFmode
|| mode
== V64SImode
)
19378 && MOD4_SSE_REGNO_P (regno
))
19381 /* TODO check for QI/HI scalars. */
19382 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
19383 if (TARGET_AVX512VL
19386 || VALID_AVX256_REG_MODE (mode
)
19387 || VALID_AVX512VL_128_REG_MODE (mode
)))
19390 /* xmm16-xmm31 are only available for AVX-512. */
19391 if (EXT_REX_SSE_REGNO_P (regno
))
19394 /* OImode and AVX modes are available only when AVX is enabled. */
19395 return ((TARGET_AVX
19396 && VALID_AVX256_REG_OR_OI_MODE (mode
))
19397 || VALID_SSE_REG_MODE (mode
)
19398 || VALID_SSE2_REG_MODE (mode
)
19399 || VALID_MMX_REG_MODE (mode
)
19400 || VALID_MMX_REG_MODE_3DNOW (mode
));
19402 if (MMX_REGNO_P (regno
))
19404 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19405 so if the register is available at all, then we can move data of
19406 the given mode into or out of it. */
19407 return (VALID_MMX_REG_MODE (mode
)
19408 || VALID_MMX_REG_MODE_3DNOW (mode
));
19411 if (mode
== QImode
)
19413 /* Take care for QImode values - they can be in non-QI regs,
19414 but then they do cause partial register stalls. */
19415 if (ANY_QI_REGNO_P (regno
))
19417 if (!TARGET_PARTIAL_REG_STALL
)
19419 /* LRA checks if the hard register is OK for the given mode.
19420 QImode values can live in non-QI regs, so we allow all
19422 if (lra_in_progress
)
19424 return !can_create_pseudo_p ();
19426 /* We handle both integer and floats in the general purpose registers. */
19427 else if (VALID_INT_MODE_P (mode
))
19429 else if (VALID_FP_MODE_P (mode
))
19431 else if (VALID_DFP_MODE_P (mode
))
19433 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19434 on to use that value in smaller contexts, this can easily force a
19435 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19436 supporting DImode, allow it. */
19437 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
19443 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
19444 saves SSE registers across calls is Win64 (thus no need to check the
19445 current ABI here), and with AVX enabled Win64 only guarantees that
19446 the low 16 bytes are saved. */
19449 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
19452 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
19455 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19456 tieable integer mode. */
19459 ix86_tieable_integer_mode_p (machine_mode mode
)
19468 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
19471 return TARGET_64BIT
;
19478 /* Implement TARGET_MODES_TIEABLE_P.
19480 Return true if MODE1 is accessible in a register that can hold MODE2
19481 without copying. That is, all register classes that can hold MODE2
19482 can also hold MODE1. */
19485 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
19487 if (mode1
== mode2
)
19490 if (ix86_tieable_integer_mode_p (mode1
)
19491 && ix86_tieable_integer_mode_p (mode2
))
19494 /* MODE2 being XFmode implies fp stack or general regs, which means we
19495 can tie any smaller floating point modes to it. Note that we do not
19496 tie this with TFmode. */
19497 if (mode2
== XFmode
)
19498 return mode1
== SFmode
|| mode1
== DFmode
;
19500 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19501 that we can tie it with SFmode. */
19502 if (mode2
== DFmode
)
19503 return mode1
== SFmode
;
19505 /* If MODE2 is only appropriate for an SSE register, then tie with
19506 any other mode acceptable to SSE registers. */
19507 if (GET_MODE_SIZE (mode2
) == 64
19508 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19509 return (GET_MODE_SIZE (mode1
) == 64
19510 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19511 if (GET_MODE_SIZE (mode2
) == 32
19512 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19513 return (GET_MODE_SIZE (mode1
) == 32
19514 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19515 if (GET_MODE_SIZE (mode2
) == 16
19516 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19517 return (GET_MODE_SIZE (mode1
) == 16
19518 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19520 /* If MODE2 is appropriate for an MMX register, then tie
19521 with any other mode acceptable to MMX registers. */
19522 if (GET_MODE_SIZE (mode2
) == 8
19523 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
19524 return (GET_MODE_SIZE (mode1
) == 8
19525 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
19530 /* Return the cost of moving between two registers of mode MODE. */
19533 ix86_set_reg_reg_cost (machine_mode mode
)
19535 unsigned int units
= UNITS_PER_WORD
;
19537 switch (GET_MODE_CLASS (mode
))
19543 units
= GET_MODE_SIZE (CCmode
);
19547 if ((TARGET_SSE
&& mode
== TFmode
)
19548 || (TARGET_80387
&& mode
== XFmode
)
19549 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
19550 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
19551 units
= GET_MODE_SIZE (mode
);
19554 case MODE_COMPLEX_FLOAT
:
19555 if ((TARGET_SSE
&& mode
== TCmode
)
19556 || (TARGET_80387
&& mode
== XCmode
)
19557 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
19558 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
19559 units
= GET_MODE_SIZE (mode
);
19562 case MODE_VECTOR_INT
:
19563 case MODE_VECTOR_FLOAT
:
19564 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
19565 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
19566 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
19567 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
19568 || ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
19569 && VALID_MMX_REG_MODE (mode
)))
19570 units
= GET_MODE_SIZE (mode
);
19573 /* Return the cost of moving between two registers of mode MODE,
19574 assuming that the move will be in pieces of at most UNITS bytes. */
19575 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
19578 /* Return cost of vector operation in MODE given that scalar version has
19582 ix86_vec_cost (machine_mode mode
, int cost
)
19584 if (!VECTOR_MODE_P (mode
))
19587 if (GET_MODE_BITSIZE (mode
) == 128
19588 && TARGET_SSE_SPLIT_REGS
)
19590 if (GET_MODE_BITSIZE (mode
) > 128
19591 && TARGET_AVX256_SPLIT_REGS
)
19592 return cost
* GET_MODE_BITSIZE (mode
) / 128;
19596 /* Return cost of multiplication in MODE. */
19599 ix86_multiplication_cost (const struct processor_costs
*cost
,
19600 enum machine_mode mode
)
19602 machine_mode inner_mode
= mode
;
19603 if (VECTOR_MODE_P (mode
))
19604 inner_mode
= GET_MODE_INNER (mode
);
19606 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19607 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
19608 else if (X87_FLOAT_MODE_P (mode
))
19610 else if (FLOAT_MODE_P (mode
))
19611 return ix86_vec_cost (mode
,
19612 inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
);
19613 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19615 /* vpmullq is used in this case. No emulation is needed. */
19616 if (TARGET_AVX512DQ
)
19617 return ix86_vec_cost (mode
, cost
->mulss
);
19619 /* V*QImode is emulated with 7-13 insns. */
19620 if (mode
== V16QImode
|| mode
== V32QImode
)
19623 if (TARGET_XOP
&& mode
== V16QImode
)
19625 else if (TARGET_SSSE3
)
19627 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* extra
);
19629 /* V*DImode is emulated with 5-8 insns. */
19630 else if (mode
== V2DImode
|| mode
== V4DImode
)
19632 if (TARGET_XOP
&& mode
== V2DImode
)
19633 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 3);
19635 return ix86_vec_cost (mode
, cost
->mulss
* 3 + cost
->sse_op
* 5);
19637 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19638 insns, including two PMULUDQ. */
19639 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
19640 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5);
19642 return ix86_vec_cost (mode
, cost
->mulss
);
19645 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
19648 /* Return cost of multiplication in MODE. */
19651 ix86_division_cost (const struct processor_costs
*cost
,
19652 enum machine_mode mode
)
19654 machine_mode inner_mode
= mode
;
19655 if (VECTOR_MODE_P (mode
))
19656 inner_mode
= GET_MODE_INNER (mode
);
19658 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19659 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
19660 else if (X87_FLOAT_MODE_P (mode
))
19662 else if (FLOAT_MODE_P (mode
))
19663 return ix86_vec_cost (mode
,
19664 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
);
19666 return cost
->divide
[MODE_INDEX (mode
)];
19669 #define COSTS_N_BYTES(N) ((N) * 2)
19671 /* Return cost of shift in MODE.
19672 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19673 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19674 if op1 is a result of subreg.
19676 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19679 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
19680 enum machine_mode mode
, bool constant_op1
,
19681 HOST_WIDE_INT op1_val
,
19684 bool shift_and_truncate
,
19685 bool *skip_op0
, bool *skip_op1
)
19688 *skip_op0
= *skip_op1
= false;
19689 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19691 /* V*QImode is emulated with 1-11 insns. */
19692 if (mode
== V16QImode
|| mode
== V32QImode
)
19695 if (TARGET_XOP
&& mode
== V16QImode
)
19697 /* For XOP we use vpshab, which requires a broadcast of the
19698 value to the variable shift insn. For constants this
19699 means a V16Q const in mem; even when we can perform the
19700 shift with one insn set the cost to prefer paddb. */
19705 return ix86_vec_cost (mode
,
19710 (GET_MODE_UNIT_SIZE (mode
))));
19714 else if (TARGET_SSSE3
)
19716 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
19719 return ix86_vec_cost (mode
, cost
->sse_op
);
19721 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19726 return cost
->shift_const
+ COSTS_N_INSNS (2);
19728 return cost
->shift_const
* 2;
19733 return cost
->shift_var
* 2;
19735 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19741 return cost
->shift_const
;
19742 else if (shift_and_truncate
)
19745 *skip_op0
= *skip_op1
= true;
19746 /* Return the cost after shift-and truncation. */
19747 return cost
->shift_var
;
19750 return cost
->shift_var
;
19752 return cost
->shift_const
;
19755 /* Compute a (partial) cost for rtx X. Return true if the complete
19756 cost has been computed, and false if subexpressions should be
19757 scanned. In either case, *TOTAL contains the cost result. */
19760 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
19761 int *total
, bool speed
)
19764 enum rtx_code code
= GET_CODE (x
);
19765 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
19766 const struct processor_costs
*cost
19767 = speed
? ix86_tune_cost
: &ix86_size_cost
;
19773 if (register_operand (SET_DEST (x
), VOIDmode
)
19774 && register_operand (SET_SRC (x
), VOIDmode
))
19776 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
19780 if (register_operand (SET_SRC (x
), VOIDmode
))
19781 /* Avoid potentially incorrect high cost from rtx_costs
19782 for non-tieable SUBREGs. */
19786 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
19788 if (CONSTANT_P (SET_SRC (x
)))
19789 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19790 a small value, possibly zero for cheap constants. */
19791 src_cost
+= COSTS_N_INSNS (1);
19794 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
19801 if (x86_64_immediate_operand (x
, VOIDmode
))
19808 if (IS_STACK_MODE (mode
))
19809 switch (standard_80387_constant_p (x
))
19817 default: /* Other constants */
19824 switch (standard_sse_constant_p (x
, mode
))
19828 case 1: /* 0: xor eliminates false dependency */
19831 default: /* -1: cmp contains false dependency */
19837 case CONST_WIDE_INT
:
19838 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19839 it'll probably end up. Add a penalty for size. */
19840 *total
= (COSTS_N_INSNS (1)
19841 + (!TARGET_64BIT
&& flag_pic
)
19842 + (GET_MODE_SIZE (mode
) <= 4
19843 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
19847 /* The zero extensions is often completely free on x86_64, so make
19848 it as cheap as possible. */
19849 if (TARGET_64BIT
&& mode
== DImode
19850 && GET_MODE (XEXP (x
, 0)) == SImode
)
19852 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19853 *total
= cost
->add
;
19855 *total
= cost
->movzx
;
19859 *total
= cost
->movsx
;
19863 if (SCALAR_INT_MODE_P (mode
)
19864 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
19865 && CONST_INT_P (XEXP (x
, 1)))
19867 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19870 *total
= cost
->add
;
19873 if ((value
== 2 || value
== 3)
19874 && cost
->lea
<= cost
->shift_const
)
19876 *total
= cost
->lea
;
19886 bool skip_op0
, skip_op1
;
19887 *total
= ix86_shift_rotate_cost (cost
, mode
, CONSTANT_P (XEXP (x
, 1)),
19888 CONST_INT_P (XEXP (x
, 1))
19889 ? INTVAL (XEXP (x
, 1)) : -1,
19891 GET_CODE (XEXP (x
, 1)) == AND
,
19892 SUBREG_P (XEXP (x
, 1))
19893 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
,
19894 &skip_op0
, &skip_op1
);
19895 if (skip_op0
|| skip_op1
)
19898 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
19900 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
19909 gcc_assert (FLOAT_MODE_P (mode
));
19910 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
19912 *total
= ix86_vec_cost (mode
,
19913 GET_MODE_INNER (mode
) == SFmode
19914 ? cost
->fmass
: cost
->fmasd
);
19915 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
19917 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19919 if (GET_CODE (sub
) == NEG
)
19920 sub
= XEXP (sub
, 0);
19921 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
19924 if (GET_CODE (sub
) == NEG
)
19925 sub
= XEXP (sub
, 0);
19926 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
19931 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
19933 rtx op0
= XEXP (x
, 0);
19934 rtx op1
= XEXP (x
, 1);
19936 if (CONST_INT_P (XEXP (x
, 1)))
19938 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19939 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19943 /* This is arbitrary. */
19946 /* Compute costs correctly for widening multiplication. */
19947 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
19948 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19949 == GET_MODE_SIZE (mode
))
19951 int is_mulwiden
= 0;
19952 machine_mode inner_mode
= GET_MODE (op0
);
19954 if (GET_CODE (op0
) == GET_CODE (op1
))
19955 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19956 else if (CONST_INT_P (op1
))
19958 if (GET_CODE (op0
) == SIGN_EXTEND
)
19959 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19962 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19966 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19969 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
19970 + nbits
* cost
->mult_bit
19971 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
19972 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
19976 *total
= ix86_multiplication_cost (cost
, mode
);
19983 *total
= ix86_division_cost (cost
, mode
);
19987 if (GET_MODE_CLASS (mode
) == MODE_INT
19988 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
19990 if (GET_CODE (XEXP (x
, 0)) == PLUS
19991 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19992 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19993 && CONSTANT_P (XEXP (x
, 1)))
19995 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19996 if (val
== 2 || val
== 4 || val
== 8)
19998 *total
= cost
->lea
;
19999 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
20000 outer_code
, opno
, speed
);
20001 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
20002 outer_code
, opno
, speed
);
20003 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20004 outer_code
, opno
, speed
);
20008 else if (GET_CODE (XEXP (x
, 0)) == MULT
20009 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
20011 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
20012 if (val
== 2 || val
== 4 || val
== 8)
20014 *total
= cost
->lea
;
20015 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
20016 outer_code
, opno
, speed
);
20017 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20018 outer_code
, opno
, speed
);
20022 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
20024 /* Add with carry, ignore the cost of adding a carry flag. */
20025 if (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 0), mode
))
20026 *total
= cost
->add
;
20029 *total
= cost
->lea
;
20030 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
20031 outer_code
, opno
, speed
);
20034 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
20035 outer_code
, opno
, speed
);
20036 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20037 outer_code
, opno
, speed
);
20044 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
20045 if (GET_MODE_CLASS (mode
) == MODE_INT
20046 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
20047 && GET_CODE (XEXP (x
, 0)) == MINUS
20048 && ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
))
20050 *total
= cost
->add
;
20051 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
20052 outer_code
, opno
, speed
);
20053 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20054 outer_code
, opno
, speed
);
20058 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20060 *total
= cost
->addss
;
20063 else if (X87_FLOAT_MODE_P (mode
))
20065 *total
= cost
->fadd
;
20068 else if (FLOAT_MODE_P (mode
))
20070 *total
= ix86_vec_cost (mode
, cost
->addss
);
20078 if (GET_MODE_CLASS (mode
) == MODE_INT
20079 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20081 *total
= (cost
->add
* 2
20082 + (rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
20083 << (GET_MODE (XEXP (x
, 0)) != DImode
))
20084 + (rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
)
20085 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
20091 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20093 *total
= cost
->sse_op
;
20096 else if (X87_FLOAT_MODE_P (mode
))
20098 *total
= cost
->fchs
;
20101 else if (FLOAT_MODE_P (mode
))
20103 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
20109 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
20110 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
20111 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20112 *total
= cost
->add
* 2;
20114 *total
= cost
->add
;
20121 if (GET_CODE (op0
) == ZERO_EXTRACT
20122 && XEXP (op0
, 1) == const1_rtx
20123 && CONST_INT_P (XEXP (op0
, 2))
20124 && op1
== const0_rtx
)
20126 /* This kind of construct is implemented using test[bwl].
20127 Treat it as if we had an AND. */
20128 mode
= GET_MODE (XEXP (op0
, 0));
20129 *total
= (cost
->add
20130 + rtx_cost (XEXP (op0
, 0), mode
, outer_code
,
20132 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
20136 if (GET_CODE (op0
) == PLUS
&& rtx_equal_p (XEXP (op0
, 0), op1
))
20138 /* This is an overflow detection, count it as a normal compare. */
20139 *total
= rtx_cost (op0
, GET_MODE (op0
), COMPARE
, 0, speed
);
20145 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
20146 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
20147 if (mode
== CCCmode
20148 && GET_CODE (op0
) == NEG
20149 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
20150 && REG_P (XEXP (geu
, 0))
20151 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
20152 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
20153 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
20154 && XEXP (geu
, 1) == const0_rtx
20155 && GET_CODE (op1
) == LTU
20156 && REG_P (XEXP (op1
, 0))
20157 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
20158 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
20159 && XEXP (op1
, 1) == const0_rtx
)
20161 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
20166 /* The embedded comparison operand is completely free. */
20167 if (!general_operand (op0
, GET_MODE (op0
)) && op1
== const0_rtx
)
20173 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
20176 *total
= ix86_vec_cost (mode
, cost
->addss
);
20179 case FLOAT_TRUNCATE
:
20180 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
20181 *total
= cost
->fadd
;
20183 *total
= ix86_vec_cost (mode
, cost
->addss
);
20187 /* SSE requires memory load for the constant operand. It may make
20188 sense to account for this. Of course the constant operand may or
20189 may not be reused. */
20190 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20191 *total
= cost
->sse_op
;
20192 else if (X87_FLOAT_MODE_P (mode
))
20193 *total
= cost
->fabs
;
20194 else if (FLOAT_MODE_P (mode
))
20195 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
20199 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20200 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
20201 else if (X87_FLOAT_MODE_P (mode
))
20202 *total
= cost
->fsqrt
;
20203 else if (FLOAT_MODE_P (mode
))
20204 *total
= ix86_vec_cost (mode
,
20205 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
);
20209 if (XINT (x
, 1) == UNSPEC_TP
)
20215 case VEC_DUPLICATE
:
20216 /* ??? Assume all of these vector manipulation patterns are
20217 recognizable. In which case they all pretty much have the
20219 *total
= cost
->sse_op
;
20222 mask
= XEXP (x
, 2);
20223 /* This is masked instruction, assume the same cost,
20224 as nonmasked variant. */
20225 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
20226 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
20228 *total
= cost
->sse_op
;
20238 static int current_machopic_label_num
;
20240 /* Given a symbol name and its associated stub, write out the
20241 definition of the stub. */
20244 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20246 unsigned int length
;
20247 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
20248 int label
= ++current_machopic_label_num
;
20250 /* For 64-bit we shouldn't get here. */
20251 gcc_assert (!TARGET_64BIT
);
20253 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20254 symb
= targetm
.strip_name_encoding (symb
);
20256 length
= strlen (stub
);
20257 binder_name
= XALLOCAVEC (char, length
+ 32);
20258 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
20260 length
= strlen (symb
);
20261 symbol_name
= XALLOCAVEC (char, length
+ 32);
20262 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20264 sprintf (lazy_ptr_name
, "L%d$lz", label
);
20266 if (MACHOPIC_ATT_STUB
)
20267 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
20268 else if (MACHOPIC_PURE
)
20269 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
20271 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
20273 fprintf (file
, "%s:\n", stub
);
20274 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20276 if (MACHOPIC_ATT_STUB
)
20278 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
20280 else if (MACHOPIC_PURE
)
20283 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20284 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
20285 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
20286 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
20287 label
, lazy_ptr_name
, label
);
20288 fprintf (file
, "\tjmp\t*%%ecx\n");
20291 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
20293 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
20294 it needs no stub-binding-helper. */
20295 if (MACHOPIC_ATT_STUB
)
20298 fprintf (file
, "%s:\n", binder_name
);
20302 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
20303 fprintf (file
, "\tpushl\t%%ecx\n");
20306 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
20308 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
20310 /* N.B. Keep the correspondence of these
20311 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
20312 old-pic/new-pic/non-pic stubs; altering this will break
20313 compatibility with existing dylibs. */
20316 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20317 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
20320 /* 16-byte -mdynamic-no-pic stub. */
20321 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
20323 fprintf (file
, "%s:\n", lazy_ptr_name
);
20324 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20325 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
20327 #endif /* TARGET_MACHO */
20329 /* Order the registers for register allocator. */
20332 x86_order_regs_for_local_alloc (void)
20337 /* First allocate the local general purpose registers. */
20338 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
20339 if (GENERAL_REGNO_P (i
) && call_used_or_fixed_reg_p (i
))
20340 reg_alloc_order
[pos
++] = i
;
20342 /* Global general purpose registers. */
20343 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
20344 if (GENERAL_REGNO_P (i
) && !call_used_or_fixed_reg_p (i
))
20345 reg_alloc_order
[pos
++] = i
;
20347 /* x87 registers come first in case we are doing FP math
20349 if (!TARGET_SSE_MATH
)
20350 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
20351 reg_alloc_order
[pos
++] = i
;
20353 /* SSE registers. */
20354 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
20355 reg_alloc_order
[pos
++] = i
;
20356 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
20357 reg_alloc_order
[pos
++] = i
;
20359 /* Extended REX SSE registers. */
20360 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
20361 reg_alloc_order
[pos
++] = i
;
20363 /* Mask register. */
20364 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
20365 reg_alloc_order
[pos
++] = i
;
20367 /* x87 registers. */
20368 if (TARGET_SSE_MATH
)
20369 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
20370 reg_alloc_order
[pos
++] = i
;
20372 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
20373 reg_alloc_order
[pos
++] = i
;
20375 /* Initialize the rest of array as we do not allocate some registers
20377 while (pos
< FIRST_PSEUDO_REGISTER
)
20378 reg_alloc_order
[pos
++] = 0;
20382 ix86_ms_bitfield_layout_p (const_tree record_type
)
20384 return ((TARGET_MS_BITFIELD_LAYOUT
20385 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20386 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
20389 /* Returns an expression indicating where the this parameter is
20390 located on entry to the FUNCTION. */
20393 x86_this_parameter (tree function
)
20395 tree type
= TREE_TYPE (function
);
20396 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
20401 const int *parm_regs
;
20403 if (ix86_function_type_abi (type
) == MS_ABI
)
20404 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
20406 parm_regs
= x86_64_int_parameter_registers
;
20407 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
20410 nregs
= ix86_function_regparm (type
, function
);
20412 if (nregs
> 0 && !stdarg_p (type
))
20415 unsigned int ccvt
= ix86_get_callcvt (type
);
20417 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
20418 regno
= aggr
? DX_REG
: CX_REG
;
20419 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
20423 return gen_rtx_MEM (SImode
,
20424 plus_constant (Pmode
, stack_pointer_rtx
, 4));
20433 return gen_rtx_MEM (SImode
,
20434 plus_constant (Pmode
,
20435 stack_pointer_rtx
, 4));
20438 return gen_rtx_REG (SImode
, regno
);
20441 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
20445 /* Determine whether x86_output_mi_thunk can succeed. */
20448 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
20449 const_tree function
)
20451 /* 64-bit can handle anything. */
20455 /* For 32-bit, everything's fine if we have one free register. */
20456 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
20459 /* Need a free register for vcall_offset. */
20463 /* Need a free register for GOT references. */
20464 if (flag_pic
&& !targetm
.binds_local_p (function
))
20467 /* Otherwise ok. */
20471 /* Output the assembler code for a thunk function. THUNK_DECL is the
20472 declaration for the thunk function itself, FUNCTION is the decl for
20473 the target function. DELTA is an immediate constant offset to be
20474 added to THIS. If VCALL_OFFSET is nonzero, the word at
20475 *(*this + vcall_offset) should be added to THIS. */
20478 x86_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
20479 HOST_WIDE_INT vcall_offset
, tree function
)
20481 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
20482 rtx this_param
= x86_this_parameter (function
);
20483 rtx this_reg
, tmp
, fnaddr
;
20484 unsigned int tmp_regno
;
20488 tmp_regno
= R10_REG
;
20491 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
20492 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
20493 tmp_regno
= AX_REG
;
20494 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
20495 tmp_regno
= DX_REG
;
20497 tmp_regno
= CX_REG
;
20500 emit_note (NOTE_INSN_PROLOGUE_END
);
20502 /* CET is enabled, insert EB instruction. */
20503 if ((flag_cf_protection
& CF_BRANCH
))
20504 emit_insn (gen_nop_endbr ());
20506 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20507 pull it in now and let DELTA benefit. */
20508 if (REG_P (this_param
))
20509 this_reg
= this_param
;
20510 else if (vcall_offset
)
20512 /* Put the this parameter into %eax. */
20513 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
20514 emit_move_insn (this_reg
, this_param
);
20517 this_reg
= NULL_RTX
;
20519 /* Adjust the this parameter by a fixed constant. */
20522 rtx delta_rtx
= GEN_INT (delta
);
20523 rtx delta_dst
= this_reg
? this_reg
: this_param
;
20527 if (!x86_64_general_operand (delta_rtx
, Pmode
))
20529 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
20530 emit_move_insn (tmp
, delta_rtx
);
20535 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
20538 /* Adjust the this parameter by a value stored in the vtable. */
20541 rtx vcall_addr
, vcall_mem
, this_mem
;
20543 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
20545 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
20546 if (Pmode
!= ptr_mode
)
20547 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
20548 emit_move_insn (tmp
, this_mem
);
20550 /* Adjust the this parameter. */
20551 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
20553 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
20555 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
20556 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
20557 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
20560 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
20561 if (Pmode
!= ptr_mode
)
20562 emit_insn (gen_addsi_1_zext (this_reg
,
20563 gen_rtx_REG (ptr_mode
,
20567 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
20570 /* If necessary, drop THIS back to its stack slot. */
20571 if (this_reg
&& this_reg
!= this_param
)
20572 emit_move_insn (this_param
, this_reg
);
20574 fnaddr
= XEXP (DECL_RTL (function
), 0);
20577 if (!flag_pic
|| targetm
.binds_local_p (function
)
20582 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
20583 tmp
= gen_rtx_CONST (Pmode
, tmp
);
20584 fnaddr
= gen_const_mem (Pmode
, tmp
);
20589 if (!flag_pic
|| targetm
.binds_local_p (function
))
20592 else if (TARGET_MACHO
)
20594 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
20595 fnaddr
= XEXP (fnaddr
, 0);
20597 #endif /* TARGET_MACHO */
20600 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
20601 output_set_got (tmp
, NULL_RTX
);
20603 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
20604 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
20605 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
20606 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
20610 /* Our sibling call patterns do not allow memories, because we have no
20611 predicate that can distinguish between frame and non-frame memory.
20612 For our purposes here, we can get away with (ab)using a jump pattern,
20613 because we're going to do no optimization. */
20614 if (MEM_P (fnaddr
))
20616 if (sibcall_insn_operand (fnaddr
, word_mode
))
20618 fnaddr
= XEXP (DECL_RTL (function
), 0);
20619 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
20620 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
20621 tmp
= emit_call_insn (tmp
);
20622 SIBLING_CALL_P (tmp
) = 1;
20625 emit_jump_insn (gen_indirect_jump (fnaddr
));
20629 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
20631 // CM_LARGE_PIC always uses pseudo PIC register which is
20632 // uninitialized. Since FUNCTION is local and calling it
20633 // doesn't go through PLT, we use scratch register %r11 as
20634 // PIC register and initialize it here.
20635 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
20636 ix86_init_large_pic_reg (tmp_regno
);
20637 fnaddr
= legitimize_pic_address (fnaddr
,
20638 gen_rtx_REG (Pmode
, tmp_regno
));
20641 if (!sibcall_insn_operand (fnaddr
, word_mode
))
20643 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
20644 if (GET_MODE (fnaddr
) != word_mode
)
20645 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
20646 emit_move_insn (tmp
, fnaddr
);
20650 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
20651 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
20652 tmp
= emit_call_insn (tmp
);
20653 SIBLING_CALL_P (tmp
) = 1;
20657 /* Emit just enough of rest_of_compilation to get the insns emitted. */
20658 insn
= get_insns ();
20659 shorten_branches (insn
);
20660 assemble_start_function (thunk_fndecl
, fnname
);
20661 final_start_function (insn
, file
, 1);
20662 final (insn
, file
, 1);
20663 final_end_function ();
20664 assemble_end_function (thunk_fndecl
, fnname
);
20668 x86_file_start (void)
20670 default_file_start ();
20672 fputs ("\t.code16gcc\n", asm_out_file
);
20674 darwin_file_start ();
20676 if (X86_FILE_START_VERSION_DIRECTIVE
)
20677 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
20678 if (X86_FILE_START_FLTUSED
)
20679 fputs ("\t.global\t__fltused\n", asm_out_file
);
20680 if (ix86_asm_dialect
== ASM_INTEL
)
20681 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
20685 x86_field_alignment (tree type
, int computed
)
20689 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
20692 return iamcu_alignment (type
, computed
);
20693 type
= strip_array_types (type
);
20694 mode
= TYPE_MODE (type
);
20695 if (mode
== DFmode
|| mode
== DCmode
20696 || GET_MODE_CLASS (mode
) == MODE_INT
20697 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
20699 if (TYPE_ATOMIC (type
) && computed
> 32)
20701 static bool warned
;
20703 if (!warned
&& warn_psabi
)
20706 = CHANGES_ROOT_URL
"gcc-11/changes.html#ia32_atomic";
20709 inform (input_location
, "the alignment of %<_Atomic %T%> "
20710 "fields changed in %{GCC 11.1%}",
20711 TYPE_MAIN_VARIANT (type
), url
);
20715 return MIN (32, computed
);
20720 /* Print call to TARGET to FILE. */
20723 x86_print_call_or_nop (FILE *file
, const char *target
)
20725 if (flag_nop_mcount
|| !strcmp (target
, "nop"))
20726 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20727 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20729 fprintf (file
, "1:\tcall\t%s\n", target
);
20733 current_fentry_name (const char **name
)
20735 tree attr
= lookup_attribute ("fentry_name",
20736 DECL_ATTRIBUTES (current_function_decl
));
20739 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20744 current_fentry_section (const char **name
)
20746 tree attr
= lookup_attribute ("fentry_section",
20747 DECL_ATTRIBUTES (current_function_decl
));
20750 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20754 /* Output assembler code to FILE to increment profiler label # LABELNO
20755 for profiling a function entry. */
20757 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
20759 if (cfun
->machine
->insn_queued_at_entrance
)
20761 if (cfun
->machine
->insn_queued_at_entrance
== TYPE_ENDBR
)
20762 fprintf (file
, "\t%s\n", TARGET_64BIT
? "endbr64" : "endbr32");
20763 unsigned int patch_area_size
20764 = crtl
->patch_area_size
- crtl
->patch_area_entry
;
20765 if (patch_area_size
)
20766 ix86_output_patchable_area (patch_area_size
,
20767 crtl
->patch_area_entry
== 0);
20770 const char *mcount_name
= MCOUNT_NAME
;
20772 if (current_fentry_name (&mcount_name
))
20774 else if (fentry_name
)
20775 mcount_name
= fentry_name
;
20776 else if (flag_fentry
)
20777 mcount_name
= MCOUNT_NAME_BEFORE_PROLOGUE
;
20781 #ifndef NO_PROFILE_COUNTERS
20782 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
20785 if (!TARGET_PECOFF
&& flag_pic
)
20786 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
20788 x86_print_call_or_nop (file
, mcount_name
);
20792 #ifndef NO_PROFILE_COUNTERS
20793 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
20796 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
20800 #ifndef NO_PROFILE_COUNTERS
20801 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
20804 x86_print_call_or_nop (file
, mcount_name
);
20807 if (flag_record_mcount
20808 || lookup_attribute ("fentry_section",
20809 DECL_ATTRIBUTES (current_function_decl
)))
20811 const char *sname
= "__mcount_loc";
20813 if (current_fentry_section (&sname
))
20815 else if (fentry_section
)
20816 sname
= fentry_section
;
20818 fprintf (file
, "\t.section %s, \"a\",@progbits\n", sname
);
20819 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
20820 fprintf (file
, "\t.previous\n");
20824 /* We don't have exact information about the insn sizes, but we may assume
20825 quite safely that we are informed about all 1 byte insns and memory
20826 address sizes. This is enough to eliminate unnecessary padding in
20830 ix86_min_insn_size (rtx_insn
*insn
)
20834 if (!INSN_P (insn
) || !active_insn_p (insn
))
20837 /* Discard alignments we've emit and jump instructions. */
20838 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
20839 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
20842 /* Important case - calls are always 5 bytes.
20843 It is common to have many calls in the row. */
20845 && symbolic_reference_mentioned_p (PATTERN (insn
))
20846 && !SIBLING_CALL_P (insn
))
20848 len
= get_attr_length (insn
);
20852 /* For normal instructions we rely on get_attr_length being exact,
20853 with a few exceptions. */
20854 if (!JUMP_P (insn
))
20856 enum attr_type type
= get_attr_type (insn
);
20861 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
20862 || asm_noperands (PATTERN (insn
)) >= 0)
20869 /* Otherwise trust get_attr_length. */
20873 l
= get_attr_length_address (insn
);
20874 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
20883 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20885 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20889 ix86_avoid_jump_mispredicts (void)
20891 rtx_insn
*insn
, *start
= get_insns ();
20892 int nbytes
= 0, njumps
= 0;
20893 bool isjump
= false;
20895 /* Look for all minimal intervals of instructions containing 4 jumps.
20896 The intervals are bounded by START and INSN. NBYTES is the total
20897 size of instructions in the interval including INSN and not including
20898 START. When the NBYTES is smaller than 16 bytes, it is possible
20899 that the end of START and INSN ends up in the same 16byte page.
20901 The smallest offset in the page INSN can start is the case where START
20902 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20903 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20905 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20906 have to, control transfer to label(s) can be performed through other
20907 means, and also we estimate minimum length of all asm stmts as 0. */
20908 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
20912 if (LABEL_P (insn
))
20914 align_flags alignment
= label_to_alignment (insn
);
20915 int align
= alignment
.levels
[0].log
;
20916 int max_skip
= alignment
.levels
[0].maxskip
;
20920 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20921 already in the current 16 byte page, because otherwise
20922 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20923 bytes to reach 16 byte boundary. */
20925 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
20928 fprintf (dump_file
, "Label %i with max_skip %i\n",
20929 INSN_UID (insn
), max_skip
);
20932 while (nbytes
+ max_skip
>= 16)
20934 start
= NEXT_INSN (start
);
20935 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20937 njumps
--, isjump
= true;
20940 nbytes
-= ix86_min_insn_size (start
);
20946 min_size
= ix86_min_insn_size (insn
);
20947 nbytes
+= min_size
;
20949 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
20950 INSN_UID (insn
), min_size
);
20951 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
20959 start
= NEXT_INSN (start
);
20960 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20962 njumps
--, isjump
= true;
20965 nbytes
-= ix86_min_insn_size (start
);
20967 gcc_assert (njumps
>= 0);
20969 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
20970 INSN_UID (start
), INSN_UID (insn
), nbytes
);
20972 if (njumps
== 3 && isjump
&& nbytes
< 16)
20974 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
20977 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
20978 INSN_UID (insn
), padsize
);
20979 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
20985 /* AMD Athlon works faster
20986 when RET is not destination of conditional jump or directly preceded
20987 by other jump instruction. We avoid the penalty by inserting NOP just
20988 before the RET instructions in such cases. */
20990 ix86_pad_returns (void)
20995 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20997 basic_block bb
= e
->src
;
20998 rtx_insn
*ret
= BB_END (bb
);
21000 bool replace
= false;
21002 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
21003 || optimize_bb_for_size_p (bb
))
21005 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
21006 if (active_insn_p (prev
) || LABEL_P (prev
))
21008 if (prev
&& LABEL_P (prev
))
21013 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21014 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
21015 && !(e
->flags
& EDGE_FALLTHRU
))
21023 prev
= prev_active_insn (ret
);
21025 && ((JUMP_P (prev
) && any_condjump_p (prev
))
21028 /* Empty functions get branch mispredict even when
21029 the jump destination is not visible to us. */
21030 if (!prev
&& !optimize_function_for_size_p (cfun
))
21035 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
21041 /* Count the minimum number of instructions in BB. Return 4 if the
21042 number of instructions >= 4. */
21045 ix86_count_insn_bb (basic_block bb
)
21048 int insn_count
= 0;
21050 /* Count number of instructions in this block. Return 4 if the number
21051 of instructions >= 4. */
21052 FOR_BB_INSNS (bb
, insn
)
21054 /* Only happen in exit blocks. */
21056 && ANY_RETURN_P (PATTERN (insn
)))
21059 if (NONDEBUG_INSN_P (insn
)
21060 && GET_CODE (PATTERN (insn
)) != USE
21061 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
21064 if (insn_count
>= 4)
21073 /* Count the minimum number of instructions in code path in BB.
21074 Return 4 if the number of instructions >= 4. */
21077 ix86_count_insn (basic_block bb
)
21081 int min_prev_count
;
21083 /* Only bother counting instructions along paths with no
21084 more than 2 basic blocks between entry and exit. Given
21085 that BB has an edge to exit, determine if a predecessor
21086 of BB has an edge from entry. If so, compute the number
21087 of instructions in the predecessor block. If there
21088 happen to be multiple such blocks, compute the minimum. */
21089 min_prev_count
= 4;
21090 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21093 edge_iterator prev_ei
;
21095 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
21097 min_prev_count
= 0;
21100 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
21102 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
21104 int count
= ix86_count_insn_bb (e
->src
);
21105 if (count
< min_prev_count
)
21106 min_prev_count
= count
;
21112 if (min_prev_count
< 4)
21113 min_prev_count
+= ix86_count_insn_bb (bb
);
21115 return min_prev_count
;
21118 /* Pad short function to 4 instructions. */
21121 ix86_pad_short_function (void)
21126 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21128 rtx_insn
*ret
= BB_END (e
->src
);
21129 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
21131 int insn_count
= ix86_count_insn (e
->src
);
21133 /* Pad short function. */
21134 if (insn_count
< 4)
21136 rtx_insn
*insn
= ret
;
21138 /* Find epilogue. */
21141 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
21142 insn
= PREV_INSN (insn
);
21147 /* Two NOPs count as one instruction. */
21148 insn_count
= 2 * (4 - insn_count
);
21149 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
21155 /* Fix up a Windows system unwinder issue. If an EH region falls through into
21156 the epilogue, the Windows system unwinder will apply epilogue logic and
21157 produce incorrect offsets. This can be avoided by adding a nop between
21158 the last insn that can throw and the first insn of the epilogue. */
21161 ix86_seh_fixup_eh_fallthru (void)
21166 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21168 rtx_insn
*insn
, *next
;
21170 /* Find the beginning of the epilogue. */
21171 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
21172 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
21177 /* We only care about preceding insns that can throw. */
21178 insn
= prev_active_insn (insn
);
21179 if (insn
== NULL
|| !can_throw_internal (insn
))
21182 /* Do not separate calls from their debug information. */
21183 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
21184 if (NOTE_P (next
) && NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
)
21189 emit_insn_after (gen_nops (const1_rtx
), insn
);
21193 /* Implement machine specific optimizations. We implement padding of returns
21194 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21198 /* We are freeing block_for_insn in the toplev to keep compatibility
21199 with old MDEP_REORGS that are not CFG based. Recompute it now. */
21200 compute_bb_for_insn ();
21202 if (TARGET_SEH
&& current_function_has_exception_handlers ())
21203 ix86_seh_fixup_eh_fallthru ();
21205 if (optimize
&& optimize_function_for_speed_p (cfun
))
21207 if (TARGET_PAD_SHORT_FUNCTION
)
21208 ix86_pad_short_function ();
21209 else if (TARGET_PAD_RETURNS
)
21210 ix86_pad_returns ();
21211 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
21212 if (TARGET_FOUR_JUMP_LIMIT
)
21213 ix86_avoid_jump_mispredicts ();
21218 /* Return nonzero when QImode register that must be represented via REX prefix
21221 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
21224 extract_insn_cached (insn
);
21225 for (i
= 0; i
< recog_data
.n_operands
; i
++)
21226 if (GENERAL_REG_P (recog_data
.operand
[i
])
21227 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
21232 /* Return true when INSN mentions register that must be encoded using REX
21235 x86_extended_reg_mentioned_p (rtx insn
)
21237 subrtx_iterator::array_type array
;
21238 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
21240 const_rtx x
= *iter
;
21242 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
21248 /* If profitable, negate (without causing overflow) integer constant
21249 of mode MODE at location LOC. Return true in this case. */
21251 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
21255 if (!CONST_INT_P (*loc
))
21261 /* DImode x86_64 constants must fit in 32 bits. */
21262 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
21273 gcc_unreachable ();
21276 /* Avoid overflows. */
21277 if (mode_signbit_p (mode
, *loc
))
21280 val
= INTVAL (*loc
);
21282 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
21283 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
21284 if ((val
< 0 && val
!= -128)
21287 *loc
= GEN_INT (-val
);
21294 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21295 optabs would emit if we didn't have TFmode patterns. */
21298 x86_emit_floatuns (rtx operands
[2])
21300 rtx_code_label
*neglab
, *donelab
;
21301 rtx i0
, i1
, f0
, in
, out
;
21302 machine_mode mode
, inmode
;
21304 inmode
= GET_MODE (operands
[1]);
21305 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
21308 in
= force_reg (inmode
, operands
[1]);
21309 mode
= GET_MODE (out
);
21310 neglab
= gen_label_rtx ();
21311 donelab
= gen_label_rtx ();
21312 f0
= gen_reg_rtx (mode
);
21314 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
21316 expand_float (out
, in
, 0);
21318 emit_jump_insn (gen_jump (donelab
));
21321 emit_label (neglab
);
21323 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
21325 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
21327 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
21329 expand_float (f0
, i0
, 0);
21331 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
21333 emit_label (donelab
);
21336 /* Target hook for scalar_mode_supported_p. */
21338 ix86_scalar_mode_supported_p (scalar_mode mode
)
21340 if (DECIMAL_FLOAT_MODE_P (mode
))
21341 return default_decimal_float_supported_p ();
21342 else if (mode
== TFmode
)
21345 return default_scalar_mode_supported_p (mode
);
21348 /* Implements target hook vector_mode_supported_p. */
21350 ix86_vector_mode_supported_p (machine_mode mode
)
21352 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
21354 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
21356 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
21358 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
21360 if ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
21361 && VALID_MMX_REG_MODE (mode
))
21363 if ((TARGET_3DNOW
|| TARGET_MMX_WITH_SSE
)
21364 && VALID_MMX_REG_MODE_3DNOW (mode
))
21369 /* Target hook for c_mode_for_suffix. */
21370 static machine_mode
21371 ix86_c_mode_for_suffix (char suffix
)
21381 /* Worker function for TARGET_MD_ASM_ADJUST.
21383 We implement asm flag outputs, and maintain source compatibility
21384 with the old cc0-based compiler. */
21387 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
21388 vec
<const char *> &constraints
,
21389 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
21391 bool saw_asm_flag
= false;
21394 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
21396 const char *con
= constraints
[i
];
21397 if (strncmp (con
, "=@cc", 4) != 0)
21400 if (strchr (con
, ',') != NULL
)
21402 error ("alternatives not allowed in %<asm%> flag output");
21406 bool invert
= false;
21408 invert
= true, con
++;
21410 machine_mode mode
= CCmode
;
21411 rtx_code code
= UNKNOWN
;
21417 mode
= CCAmode
, code
= EQ
;
21418 else if (con
[1] == 'e' && con
[2] == 0)
21419 mode
= CCCmode
, code
= NE
;
21423 mode
= CCCmode
, code
= EQ
;
21424 else if (con
[1] == 'e' && con
[2] == 0)
21425 mode
= CCAmode
, code
= NE
;
21429 mode
= CCCmode
, code
= EQ
;
21433 mode
= CCZmode
, code
= EQ
;
21437 mode
= CCGCmode
, code
= GT
;
21438 else if (con
[1] == 'e' && con
[2] == 0)
21439 mode
= CCGCmode
, code
= GE
;
21443 mode
= CCGCmode
, code
= LT
;
21444 else if (con
[1] == 'e' && con
[2] == 0)
21445 mode
= CCGCmode
, code
= LE
;
21449 mode
= CCOmode
, code
= EQ
;
21453 mode
= CCPmode
, code
= EQ
;
21457 mode
= CCSmode
, code
= EQ
;
21461 mode
= CCZmode
, code
= EQ
;
21464 if (code
== UNKNOWN
)
21466 error ("unknown %<asm%> flag output %qs", constraints
[i
]);
21470 code
= reverse_condition (code
);
21472 rtx dest
= outputs
[i
];
21475 /* This is the first asm flag output. Here we put the flags
21476 register in as the real output and adjust the condition to
21478 constraints
[i
] = "=Bf";
21479 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
21480 saw_asm_flag
= true;
21484 /* We don't need the flags register as output twice. */
21485 constraints
[i
] = "=X";
21486 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
21489 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
21490 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
21492 machine_mode dest_mode
= GET_MODE (dest
);
21493 if (!SCALAR_INT_MODE_P (dest_mode
))
21495 error ("invalid type for %<asm%> flag output");
21499 if (dest_mode
== DImode
&& !TARGET_64BIT
)
21500 dest_mode
= SImode
;
21502 if (dest_mode
!= QImode
)
21504 rtx destqi
= gen_reg_rtx (QImode
);
21505 emit_insn (gen_rtx_SET (destqi
, x
));
21507 if (TARGET_ZERO_EXTEND_WITH_AND
21508 && optimize_function_for_speed_p (cfun
))
21510 x
= force_reg (dest_mode
, const0_rtx
);
21512 emit_insn (gen_movstrictqi (gen_lowpart (QImode
, x
), destqi
));
21516 x
= gen_rtx_ZERO_EXTEND (dest_mode
, destqi
);
21517 if (dest_mode
== GET_MODE (dest
)
21518 && !register_operand (dest
, GET_MODE (dest
)))
21519 x
= force_reg (dest_mode
, x
);
21523 if (dest_mode
!= GET_MODE (dest
))
21525 rtx tmp
= gen_reg_rtx (SImode
);
21527 emit_insn (gen_rtx_SET (tmp
, x
));
21528 emit_insn (gen_zero_extendsidi2 (dest
, tmp
));
21531 emit_insn (gen_rtx_SET (dest
, x
));
21533 rtx_insn
*seq
= get_insns ();
21540 /* If we had no asm flag outputs, clobber the flags. */
21541 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
21542 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
21547 /* Implements target vector targetm.asm.encode_section_info. */
21549 static void ATTRIBUTE_UNUSED
21550 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
21552 default_encode_section_info (decl
, rtl
, first
);
21554 if (ix86_in_large_data_p (decl
))
21555 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
21558 /* Worker function for REVERSE_CONDITION. */
21561 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
21563 return (mode
== CCFPmode
21564 ? reverse_condition_maybe_unordered (code
)
21565 : reverse_condition (code
));
21568 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21572 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
21574 if (REG_P (operands
[0]))
21576 if (REG_P (operands
[1])
21577 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21579 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21580 return output_387_ffreep (operands
, 0);
21581 return "fstp\t%y0";
21583 if (STACK_TOP_P (operands
[0]))
21584 return "fld%Z1\t%y1";
21587 else if (MEM_P (operands
[0]))
21589 gcc_assert (REG_P (operands
[1]));
21590 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21591 return "fstp%Z0\t%y0";
21594 /* There is no non-popping store to memory for XFmode.
21595 So if we need one, follow the store with a load. */
21596 if (GET_MODE (operands
[0]) == XFmode
)
21597 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21599 return "fst%Z0\t%y0";
21605 #ifdef TARGET_SOLARIS
21606 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21609 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21612 /* With Binutils 2.15, the "@unwind" marker must be specified on
21613 every occurrence of the ".eh_frame" section, not just the first
21616 && strcmp (name
, ".eh_frame") == 0)
21618 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21619 flags
& SECTION_WRITE
? "aw" : "a");
21624 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
21626 solaris_elf_asm_comdat_section (name
, flags
, decl
);
21630 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21631 SPARC assembler. One cannot mix single-letter flags and #exclude, so
21632 only emit the latter here. */
21633 if (flags
& SECTION_EXCLUDE
)
21635 fprintf (asm_out_file
, "\t.section\t%s,#exclude\n", name
);
21640 default_elf_asm_named_section (name
, flags
, decl
);
21642 #endif /* TARGET_SOLARIS */
21644 /* Return the mangling of TYPE if it is an extended fundamental type. */
21646 static const char *
21647 ix86_mangle_type (const_tree type
)
21649 type
= TYPE_MAIN_VARIANT (type
);
21651 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
21652 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
21655 switch (TYPE_MODE (type
))
21658 /* __float128 is "g". */
21661 /* "long double" or __float80 is "e". */
21668 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
21671 ix86_stack_protect_guard (void)
21673 if (TARGET_SSP_TLS_GUARD
)
21675 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
21676 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
21677 tree type
= build_qualified_type (type_node
, qual
);
21680 if (global_options_set
.x_ix86_stack_protector_guard_symbol_str
)
21682 t
= ix86_tls_stack_chk_guard_decl
;
21689 (UNKNOWN_LOCATION
, VAR_DECL
,
21690 get_identifier (ix86_stack_protector_guard_symbol_str
),
21692 TREE_STATIC (t
) = 1;
21693 TREE_PUBLIC (t
) = 1;
21694 DECL_EXTERNAL (t
) = 1;
21696 TREE_THIS_VOLATILE (t
) = 1;
21697 DECL_ARTIFICIAL (t
) = 1;
21698 DECL_IGNORED_P (t
) = 1;
21700 /* Do not share RTL as the declaration is visible outside of
21701 current function. */
21703 RTX_FLAG (x
, used
) = 1;
21705 ix86_tls_stack_chk_guard_decl
= t
;
21710 tree asptrtype
= build_pointer_type (type
);
21712 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
21713 t
= build2 (MEM_REF
, asptrtype
, t
,
21714 build_int_cst (asptrtype
, 0));
21715 TREE_THIS_VOLATILE (t
) = 1;
21721 return default_stack_protect_guard ();
21724 /* For 32-bit code we can save PIC register setup by using
21725 __stack_chk_fail_local hidden function instead of calling
21726 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21727 register, so it is better to call __stack_chk_fail directly. */
21729 static tree ATTRIBUTE_UNUSED
21730 ix86_stack_protect_fail (void)
21732 return TARGET_64BIT
21733 ? default_external_stack_protect_fail ()
21734 : default_hidden_stack_protect_fail ();
21737 /* Select a format to encode pointers in exception handling data. CODE
21738 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21739 true if the symbol may be affected by dynamic relocations.
21741 ??? All x86 object file formats are capable of representing this.
21742 After all, the relocation needed is the same as for the call insn.
21743 Whether or not a particular assembler allows us to enter such, I
21744 guess we'll have to see. */
21746 asm_preferred_eh_data_format (int code
, int global
)
21750 int type
= DW_EH_PE_sdata8
;
21752 || ix86_cmodel
== CM_SMALL_PIC
21753 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21754 type
= DW_EH_PE_sdata4
;
21755 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21757 if (ix86_cmodel
== CM_SMALL
21758 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21759 return DW_EH_PE_udata4
;
21760 return DW_EH_PE_absptr
;
21763 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21765 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
21769 machine_mode mode
= TImode
;
21771 if (vectype
!= NULL
)
21773 fp
= FLOAT_TYPE_P (vectype
);
21774 mode
= TYPE_MODE (vectype
);
21777 switch (type_of_cost
)
21780 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
21783 /* load/store costs are relative to register move which is 2. Recompute
21784 it to COSTS_N_INSNS so everything have same base. */
21785 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
21786 : ix86_cost
->int_load
[2]) / 2;
21789 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
21790 : ix86_cost
->int_store
[2]) / 2;
21793 return ix86_vec_cost (mode
,
21794 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
);
21797 index
= sse_store_index (mode
);
21798 /* See PR82713 - we may end up being called on non-vector type. */
21801 return COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2;
21804 index
= sse_store_index (mode
);
21805 /* See PR82713 - we may end up being called on non-vector type. */
21808 return COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2;
21810 case vec_to_scalar
:
21811 case scalar_to_vec
:
21812 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21814 /* We should have separate costs for unaligned loads and gather/scatter.
21815 Do that incrementally. */
21816 case unaligned_load
:
21817 index
= sse_store_index (mode
);
21818 /* See PR82713 - we may end up being called on non-vector type. */
21821 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_load
[index
]) / 2;
21823 case unaligned_store
:
21824 index
= sse_store_index (mode
);
21825 /* See PR82713 - we may end up being called on non-vector type. */
21828 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_store
[index
]) / 2;
21830 case vector_gather_load
:
21831 return ix86_vec_cost (mode
,
21833 (ix86_cost
->gather_static
21834 + ix86_cost
->gather_per_elt
21835 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21837 case vector_scatter_store
:
21838 return ix86_vec_cost (mode
,
21840 (ix86_cost
->scatter_static
21841 + ix86_cost
->scatter_per_elt
21842 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21844 case cond_branch_taken
:
21845 return ix86_cost
->cond_taken_branch_cost
;
21847 case cond_branch_not_taken
:
21848 return ix86_cost
->cond_not_taken_branch_cost
;
21851 case vec_promote_demote
:
21852 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21854 case vec_construct
:
21856 /* N element inserts into SSE vectors. */
21857 int cost
= TYPE_VECTOR_SUBPARTS (vectype
) * ix86_cost
->sse_op
;
21858 /* One vinserti128 for combining two SSE vectors for AVX256. */
21859 if (GET_MODE_BITSIZE (mode
) == 256)
21860 cost
+= ix86_vec_cost (mode
, ix86_cost
->addss
);
21861 /* One vinserti64x4 and two vinserti128 for combining SSE
21862 and AVX256 vectors to AVX512. */
21863 else if (GET_MODE_BITSIZE (mode
) == 512)
21864 cost
+= 3 * ix86_vec_cost (mode
, ix86_cost
->addss
);
21869 gcc_unreachable ();
21874 /* This function returns the calling abi specific va_list type node.
21875 It returns the FNDECL specific va_list type. */
21878 ix86_fn_abi_va_list (tree fndecl
)
21881 return va_list_type_node
;
21882 gcc_assert (fndecl
!= NULL_TREE
);
21884 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
21885 return ms_va_list_type_node
;
21887 return sysv_va_list_type_node
;
21890 /* Returns the canonical va_list type specified by TYPE. If there
21891 is no valid TYPE provided, it return NULL_TREE. */
21894 ix86_canonical_va_list_type (tree type
)
21898 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
21899 return ms_va_list_type_node
;
21901 if ((TREE_CODE (type
) == ARRAY_TYPE
21902 && integer_zerop (array_type_nelts (type
)))
21903 || POINTER_TYPE_P (type
))
21905 tree elem_type
= TREE_TYPE (type
);
21906 if (TREE_CODE (elem_type
) == RECORD_TYPE
21907 && lookup_attribute ("sysv_abi va_list",
21908 TYPE_ATTRIBUTES (elem_type
)))
21909 return sysv_va_list_type_node
;
21915 return std_canonical_va_list_type (type
);
21918 /* Iterate through the target-specific builtin types for va_list.
21919 IDX denotes the iterator, *PTREE is set to the result type of
21920 the va_list builtin, and *PNAME to its internal type.
21921 Returns zero if there is no element for this index, otherwise
21922 IDX should be increased upon the next call.
21923 Note, do not iterate a base builtin's name like __builtin_va_list.
21924 Used from c_common_nodes_and_builtins. */
21927 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
21937 *ptree
= ms_va_list_type_node
;
21938 *pname
= "__builtin_ms_va_list";
21942 *ptree
= sysv_va_list_type_node
;
21943 *pname
= "__builtin_sysv_va_list";
21951 #undef TARGET_SCHED_DISPATCH
21952 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21953 #undef TARGET_SCHED_DISPATCH_DO
21954 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21955 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21956 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21957 #undef TARGET_SCHED_REORDER
21958 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21959 #undef TARGET_SCHED_ADJUST_PRIORITY
21960 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21961 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21962 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21963 ix86_dependencies_evaluation_hook
21966 /* Implementation of reassociation_width target hook used by
21967 reassoc phase to identify parallelism level in reassociated
21968 tree. Statements tree_code is passed in OPC. Arguments type
21969 is passed in MODE. */
21972 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
21976 if (VECTOR_MODE_P (mode
))
21979 if (INTEGRAL_MODE_P (mode
))
21980 width
= ix86_cost
->reassoc_vec_int
;
21981 else if (FLOAT_MODE_P (mode
))
21982 width
= ix86_cost
->reassoc_vec_fp
;
21987 /* Integer vector instructions execute in FP unit
21988 and can execute 3 additions and one multiplication per cycle. */
21989 if ((ix86_tune
== PROCESSOR_ZNVER1
|| ix86_tune
== PROCESSOR_ZNVER2
)
21990 && INTEGRAL_MODE_P (mode
) && op
!= PLUS
&& op
!= MINUS
)
21993 /* Account for targets that splits wide vectors into multiple parts. */
21994 if (TARGET_AVX256_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 128)
21995 div
= GET_MODE_BITSIZE (mode
) / 128;
21996 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
21997 div
= GET_MODE_BITSIZE (mode
) / 64;
21998 width
= (width
+ div
- 1) / div
;
22001 else if (INTEGRAL_MODE_P (mode
))
22002 width
= ix86_cost
->reassoc_int
;
22003 else if (FLOAT_MODE_P (mode
))
22004 width
= ix86_cost
->reassoc_fp
;
22006 /* Avoid using too many registers in 32bit mode. */
22007 if (!TARGET_64BIT
&& width
> 2)
22012 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
22013 place emms and femms instructions. */
22015 static machine_mode
22016 ix86_preferred_simd_mode (scalar_mode mode
)
22024 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
22026 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22032 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
22034 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22040 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22042 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22048 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22050 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22056 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22058 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22064 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22066 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22068 else if (TARGET_SSE2
)
22077 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
22078 vectors. If AVX512F is enabled then try vectorizing with 512bit,
22079 256bit and 128bit vectors. */
22081 static unsigned int
22082 ix86_autovectorize_vector_modes (vector_modes
*modes
, bool all
)
22084 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22086 modes
->safe_push (V64QImode
);
22087 modes
->safe_push (V32QImode
);
22088 modes
->safe_push (V16QImode
);
22090 else if (TARGET_AVX512F
&& all
)
22092 modes
->safe_push (V32QImode
);
22093 modes
->safe_push (V16QImode
);
22094 modes
->safe_push (V64QImode
);
22096 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22098 modes
->safe_push (V32QImode
);
22099 modes
->safe_push (V16QImode
);
22101 else if (TARGET_AVX
&& all
)
22103 modes
->safe_push (V16QImode
);
22104 modes
->safe_push (V32QImode
);
22106 else if (TARGET_MMX_WITH_SSE
)
22107 modes
->safe_push (V16QImode
);
22109 if (TARGET_MMX_WITH_SSE
)
22110 modes
->safe_push (V8QImode
);
22115 /* Implemenation of targetm.vectorize.get_mask_mode. */
22117 static opt_machine_mode
22118 ix86_get_mask_mode (machine_mode data_mode
)
22120 unsigned vector_size
= GET_MODE_SIZE (data_mode
);
22121 unsigned nunits
= GET_MODE_NUNITS (data_mode
);
22122 unsigned elem_size
= vector_size
/ nunits
;
22124 /* Scalar mask case. */
22125 if ((TARGET_AVX512F
&& vector_size
== 64)
22126 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
22130 || (TARGET_AVX512BW
&& (elem_size
== 1 || elem_size
== 2)))
22131 return smallest_int_mode_for_size (nunits
);
22134 scalar_int_mode elem_mode
22135 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
22137 gcc_assert (elem_size
* nunits
== vector_size
);
22139 return mode_for_vector (elem_mode
, nunits
);
22144 /* Return class of registers which could be used for pseudo of MODE
22145 and of class RCLASS for spilling instead of memory. Return NO_REGS
22146 if it is not possible or non-profitable. */
22148 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22151 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
22153 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
22155 && TARGET_INTER_UNIT_MOVES_TO_VEC
22156 && TARGET_INTER_UNIT_MOVES_FROM_VEC
22157 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
22158 && INTEGER_CLASS_P (rclass
))
22159 return ALL_SSE_REGS
;
22163 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
22164 but returns a lower bound. */
22166 static unsigned int
22167 ix86_max_noce_ifcvt_seq_cost (edge e
)
22169 bool predictable_p
= predictable_edge_p (e
);
22172 if (global_options_set
.x_param_max_rtl_if_conversion_predictable_cost
)
22173 return param_max_rtl_if_conversion_predictable_cost
;
22177 if (global_options_set
.x_param_max_rtl_if_conversion_unpredictable_cost
)
22178 return param_max_rtl_if_conversion_unpredictable_cost
;
22181 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
22184 /* Return true if SEQ is a good candidate as a replacement for the
22185 if-convertible sequence described in IF_INFO. */
22188 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
22190 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
22193 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
22194 Maybe we should allow even more conditional moves as long as they
22195 are used far enough not to stall the CPU, or also consider
22196 IF_INFO->TEST_BB succ edge probabilities. */
22197 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
22199 rtx set
= single_set (insn
);
22202 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
22204 rtx src
= SET_SRC (set
);
22205 machine_mode mode
= GET_MODE (src
);
22206 if (GET_MODE_CLASS (mode
) != MODE_INT
22207 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
22209 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
22210 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
22212 /* insn is CMOV or FCMOV. */
22213 if (++cmov_cnt
> 1)
22217 return default_noce_conversion_profitable_p (seq
, if_info
);
22220 /* Implement targetm.vectorize.init_cost. */
22223 ix86_init_cost (class loop
*)
22225 unsigned *cost
= XNEWVEC (unsigned, 3);
22226 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
22230 /* Implement targetm.vectorize.add_stmt_cost. */
22233 ix86_add_stmt_cost (class vec_info
*vinfo
, void *data
, int count
,
22234 enum vect_cost_for_stmt kind
,
22235 class _stmt_vec_info
*stmt_info
, tree vectype
,
22237 enum vect_cost_model_location where
)
22239 unsigned *cost
= (unsigned *) data
;
22240 unsigned retval
= 0;
22242 = (kind
== scalar_stmt
|| kind
== scalar_load
|| kind
== scalar_store
);
22243 int stmt_cost
= - 1;
22246 machine_mode mode
= scalar_p
? SImode
: TImode
;
22248 if (vectype
!= NULL
)
22250 fp
= FLOAT_TYPE_P (vectype
);
22251 mode
= TYPE_MODE (vectype
);
22253 mode
= TYPE_MODE (TREE_TYPE (vectype
));
22256 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
22258 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
22260 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
22261 /*machine_mode inner_mode = mode;
22262 if (VECTOR_MODE_P (mode))
22263 inner_mode = GET_MODE_INNER (mode);*/
22268 case POINTER_PLUS_EXPR
:
22270 if (kind
== scalar_stmt
)
22272 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22273 stmt_cost
= ix86_cost
->addss
;
22274 else if (X87_FLOAT_MODE_P (mode
))
22275 stmt_cost
= ix86_cost
->fadd
;
22277 stmt_cost
= ix86_cost
->add
;
22280 stmt_cost
= ix86_vec_cost (mode
, fp
? ix86_cost
->addss
22281 : ix86_cost
->sse_op
);
22285 case WIDEN_MULT_EXPR
:
22286 case MULT_HIGHPART_EXPR
:
22287 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
22290 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22291 stmt_cost
= ix86_cost
->sse_op
;
22292 else if (X87_FLOAT_MODE_P (mode
))
22293 stmt_cost
= ix86_cost
->fchs
;
22294 else if (VECTOR_MODE_P (mode
))
22295 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
22297 stmt_cost
= ix86_cost
->add
;
22299 case TRUNC_DIV_EXPR
:
22300 case CEIL_DIV_EXPR
:
22301 case FLOOR_DIV_EXPR
:
22302 case ROUND_DIV_EXPR
:
22303 case TRUNC_MOD_EXPR
:
22304 case CEIL_MOD_EXPR
:
22305 case FLOOR_MOD_EXPR
:
22307 case ROUND_MOD_EXPR
:
22308 case EXACT_DIV_EXPR
:
22309 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
22317 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
22318 stmt_cost
= ix86_shift_rotate_cost
22320 TREE_CODE (op2
) == INTEGER_CST
,
22321 cst_and_fits_in_hwi (op2
) ? int_cst_value (op2
) : -1,
22322 true, false, false, NULL
, NULL
);
22326 /* Only sign-conversions are free. */
22327 if (tree_nop_conversion_p
22328 (TREE_TYPE (gimple_assign_lhs (stmt_info
->stmt
)),
22329 TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
))))
22341 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22342 stmt_cost
= ix86_cost
->sse_op
;
22343 else if (VECTOR_MODE_P (mode
))
22344 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
22346 stmt_cost
= ix86_cost
->add
;
22354 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
22357 && (cfn
= gimple_call_combined_fn (stmt_info
->stmt
)) != CFN_LAST
)
22361 stmt_cost
= ix86_vec_cost (mode
,
22362 mode
== SFmode
? ix86_cost
->fmass
22363 : ix86_cost
->fmasd
);
22369 /* If we do elementwise loads into a vector then we are bound by
22370 latency and execution resources for the many scalar loads
22371 (AGU and load ports). Try to account for this by scaling the
22372 construction cost by the number of elements involved. */
22373 if ((kind
== vec_construct
|| kind
== vec_to_scalar
)
22375 && (STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
22376 || STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
22377 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
22378 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info
))) != INTEGER_CST
)
22380 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
22381 stmt_cost
*= (TYPE_VECTOR_SUBPARTS (vectype
) + 1);
22383 if (stmt_cost
== -1)
22384 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
22386 /* Penalize DFmode vector operations for Bonnell. */
22387 if (TARGET_BONNELL
&& kind
== vector_stmt
22388 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
22389 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
22391 /* Statements in an inner loop relative to the loop being
22392 vectorized are weighted more heavily. The value here is
22393 arbitrary and could potentially be improved with analysis. */
22394 if (where
== vect_body
&& stmt_info
22395 && stmt_in_inner_loop_p (vinfo
, stmt_info
))
22396 count
*= 50; /* FIXME. */
22398 retval
= (unsigned) (count
* stmt_cost
);
22400 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22401 for Silvermont as it has out of order integer pipeline and can execute
22402 2 scalar instruction per tick, but has in order SIMD pipeline. */
22403 if ((TARGET_SILVERMONT
|| TARGET_GOLDMONT
|| TARGET_GOLDMONT_PLUS
22404 || TARGET_TREMONT
|| TARGET_INTEL
) && stmt_info
&& stmt_info
->stmt
)
22406 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
22407 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
22408 retval
= (retval
* 17) / 10;
22411 cost
[where
] += retval
;
22416 /* Implement targetm.vectorize.finish_cost. */
22419 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
22420 unsigned *body_cost
, unsigned *epilogue_cost
)
22422 unsigned *cost
= (unsigned *) data
;
22423 *prologue_cost
= cost
[vect_prologue
];
22424 *body_cost
= cost
[vect_body
];
22425 *epilogue_cost
= cost
[vect_epilogue
];
22428 /* Implement targetm.vectorize.destroy_cost_data. */
22431 ix86_destroy_cost_data (void *data
)
22436 /* Validate target specific memory model bits in VAL. */
22438 static unsigned HOST_WIDE_INT
22439 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
22441 enum memmodel model
= memmodel_from_int (val
);
22444 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
22446 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
22448 warning (OPT_Winvalid_memory_model
,
22449 "unknown architecture specific memory model");
22450 return MEMMODEL_SEQ_CST
;
22452 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
22453 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
22455 warning (OPT_Winvalid_memory_model
,
22456 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22458 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
22460 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
22462 warning (OPT_Winvalid_memory_model
,
22463 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22465 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
22470 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22471 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22472 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
22473 or number of vecsize_mangle variants that should be emitted. */
22476 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
22477 struct cgraph_simd_clone
*clonei
,
22478 tree base_type
, int num
)
22482 if (clonei
->simdlen
22483 && (clonei
->simdlen
< 2
22484 || clonei
->simdlen
> 1024
22485 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
22487 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22488 "unsupported simdlen %d", clonei
->simdlen
);
22492 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
22493 if (TREE_CODE (ret_type
) != VOID_TYPE
)
22494 switch (TYPE_MODE (ret_type
))
22502 /* case E_SCmode: */
22503 /* case E_DCmode: */
22504 if (!AGGREGATE_TYPE_P (ret_type
))
22508 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22509 "unsupported return type %qT for simd", ret_type
);
22515 tree type_arg_types
= TYPE_ARG_TYPES (TREE_TYPE (node
->decl
));
22516 bool decl_arg_p
= (node
->definition
|| type_arg_types
== NULL_TREE
);
22518 for (t
= (decl_arg_p
? DECL_ARGUMENTS (node
->decl
) : type_arg_types
), i
= 0;
22519 t
&& t
!= void_list_node
; t
= TREE_CHAIN (t
), i
++)
22521 tree arg_type
= decl_arg_p
? TREE_TYPE (t
) : TREE_VALUE (t
);
22522 switch (TYPE_MODE (arg_type
))
22530 /* case E_SCmode: */
22531 /* case E_DCmode: */
22532 if (!AGGREGATE_TYPE_P (arg_type
))
22536 if (clonei
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
)
22538 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22539 "unsupported argument type %qT for simd", arg_type
);
22544 if (!TREE_PUBLIC (node
->decl
))
22546 /* If the function isn't exported, we can pick up just one ISA
22548 if (TARGET_AVX512F
)
22549 clonei
->vecsize_mangle
= 'e';
22550 else if (TARGET_AVX2
)
22551 clonei
->vecsize_mangle
= 'd';
22552 else if (TARGET_AVX
)
22553 clonei
->vecsize_mangle
= 'c';
22555 clonei
->vecsize_mangle
= 'b';
22560 clonei
->vecsize_mangle
= "bcde"[num
];
22563 clonei
->mask_mode
= VOIDmode
;
22564 switch (clonei
->vecsize_mangle
)
22567 clonei
->vecsize_int
= 128;
22568 clonei
->vecsize_float
= 128;
22571 clonei
->vecsize_int
= 128;
22572 clonei
->vecsize_float
= 256;
22575 clonei
->vecsize_int
= 256;
22576 clonei
->vecsize_float
= 256;
22579 clonei
->vecsize_int
= 512;
22580 clonei
->vecsize_float
= 512;
22581 if (TYPE_MODE (base_type
) == QImode
)
22582 clonei
->mask_mode
= DImode
;
22584 clonei
->mask_mode
= SImode
;
22587 if (clonei
->simdlen
== 0)
22589 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
22590 clonei
->simdlen
= clonei
->vecsize_int
;
22592 clonei
->simdlen
= clonei
->vecsize_float
;
22593 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
22595 else if (clonei
->simdlen
> 16)
22597 /* For compatibility with ICC, use the same upper bounds
22598 for simdlen. In particular, for CTYPE below, use the return type,
22599 unless the function returns void, in that case use the characteristic
22600 type. If it is possible for given SIMDLEN to pass CTYPE value
22601 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22602 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22603 emit corresponding clone. */
22604 tree ctype
= ret_type
;
22605 if (TREE_CODE (ret_type
) == VOID_TYPE
)
22607 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
22608 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
22609 cnt
/= clonei
->vecsize_int
;
22611 cnt
/= clonei
->vecsize_float
;
22612 if (cnt
> (TARGET_64BIT
? 16 : 8))
22614 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22615 "unsupported simdlen %d", clonei
->simdlen
);
22622 /* If SIMD clone NODE can't be used in a vectorized loop
22623 in current function, return -1, otherwise return a badness of using it
22624 (0 if it is most desirable from vecsize_mangle point of view, 1
22625 slightly less desirable, etc.). */
22628 ix86_simd_clone_usable (struct cgraph_node
*node
)
22630 switch (node
->simdclone
->vecsize_mangle
)
22637 return TARGET_AVX2
? 2 : 1;
22641 return TARGET_AVX2
? 1 : 0;
22647 if (!TARGET_AVX512F
)
22651 gcc_unreachable ();
22655 /* This function adjusts the unroll factor based on
22656 the hardware capabilities. For ex, bdver3 has
22657 a loop buffer which makes unrolling of smaller
22658 loops less important. This function decides the
22659 unroll factor using number of memory references
22660 (value 32 is used) as a heuristic. */
22663 ix86_loop_unroll_adjust (unsigned nunroll
, class loop
*loop
)
22668 unsigned mem_count
= 0;
22670 if (!TARGET_ADJUST_UNROLL
)
22673 /* Count the number of memory references within the loop body.
22674 This value determines the unrolling factor for bdver3 and bdver4
22676 subrtx_iterator::array_type array
;
22677 bbs
= get_loop_body (loop
);
22678 for (i
= 0; i
< loop
->num_nodes
; i
++)
22679 FOR_BB_INSNS (bbs
[i
], insn
)
22680 if (NONDEBUG_INSN_P (insn
))
22681 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
22682 if (const_rtx x
= *iter
)
22685 machine_mode mode
= GET_MODE (x
);
22686 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
22694 if (mem_count
&& mem_count
<=32)
22695 return MIN (nunroll
, 32 / mem_count
);
22701 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
22704 ix86_float_exceptions_rounding_supported_p (void)
22706 /* For x87 floating point with standard excess precision handling,
22707 there is no adddf3 pattern (since x87 floating point only has
22708 XFmode operations) so the default hook implementation gets this
22710 return TARGET_80387
|| (TARGET_SSE
&& TARGET_SSE_MATH
);
22713 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
22716 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
22718 if (!TARGET_80387
&& !(TARGET_SSE
&& TARGET_SSE_MATH
))
22720 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
22723 tree fenv_index_type
= build_index_type (size_int (6));
22724 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
22725 tree fenv_var
= create_tmp_var_raw (fenv_type
);
22726 TREE_ADDRESSABLE (fenv_var
) = 1;
22727 tree fenv_ptr
= build_pointer_type (fenv_type
);
22728 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
22729 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
22730 tree fnstenv
= get_ix86_builtin (IX86_BUILTIN_FNSTENV
);
22731 tree fldenv
= get_ix86_builtin (IX86_BUILTIN_FLDENV
);
22732 tree fnstsw
= get_ix86_builtin (IX86_BUILTIN_FNSTSW
);
22733 tree fnclex
= get_ix86_builtin (IX86_BUILTIN_FNCLEX
);
22734 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
22735 tree hold_fnclex
= build_call_expr (fnclex
, 0);
22736 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
22737 NULL_TREE
, NULL_TREE
);
22738 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
22740 *clear
= build_call_expr (fnclex
, 0);
22741 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
22742 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
22743 tree sw_mod
= build4 (TARGET_EXPR
, short_unsigned_type_node
, sw_var
,
22744 fnstsw_call
, NULL_TREE
, NULL_TREE
);
22745 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
22746 tree update_mod
= build4 (TARGET_EXPR
, integer_type_node
,
22747 exceptions_var
, exceptions_x87
,
22748 NULL_TREE
, NULL_TREE
);
22749 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
22750 sw_mod
, update_mod
);
22751 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
22752 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
22754 if (TARGET_SSE
&& TARGET_SSE_MATH
)
22756 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
22757 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
22758 tree stmxcsr
= get_ix86_builtin (IX86_BUILTIN_STMXCSR
);
22759 tree ldmxcsr
= get_ix86_builtin (IX86_BUILTIN_LDMXCSR
);
22760 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
22761 tree hold_assign_orig
= build4 (TARGET_EXPR
, unsigned_type_node
,
22762 mxcsr_orig_var
, stmxcsr_hold_call
,
22763 NULL_TREE
, NULL_TREE
);
22764 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
22766 build_int_cst (unsigned_type_node
, 0x1f80));
22767 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
22768 build_int_cst (unsigned_type_node
, 0xffffffc0));
22769 tree hold_assign_mod
= build4 (TARGET_EXPR
, unsigned_type_node
,
22770 mxcsr_mod_var
, hold_mod_val
,
22771 NULL_TREE
, NULL_TREE
);
22772 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22773 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
22774 hold_assign_orig
, hold_assign_mod
);
22775 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
22776 ldmxcsr_hold_call
);
22778 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
22781 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22783 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
22784 ldmxcsr_clear_call
);
22786 *clear
= ldmxcsr_clear_call
;
22787 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
22788 tree exceptions_sse
= fold_convert (integer_type_node
,
22789 stxmcsr_update_call
);
22792 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
22793 exceptions_var
, exceptions_sse
);
22794 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
22795 exceptions_var
, exceptions_mod
);
22796 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
22797 exceptions_assign
);
22800 *update
= build4 (TARGET_EXPR
, integer_type_node
, exceptions_var
,
22801 exceptions_sse
, NULL_TREE
, NULL_TREE
);
22802 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
22803 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22804 ldmxcsr_update_call
);
22806 tree atomic_feraiseexcept
22807 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
22808 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
22809 1, exceptions_var
);
22810 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22811 atomic_feraiseexcept_call
);
22814 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22815 /* For i386, common symbol is local only for non-PIE binaries. For
22816 x86-64, common symbol is local only for non-PIE binaries or linker
22817 supports copy reloc in PIE binaries. */
22820 ix86_binds_local_p (const_tree exp
)
22822 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true, true,
22825 && HAVE_LD_PIE_COPYRELOC
!= 0)));
22829 /* If MEM is in the form of [base+offset], extract the two parts
22830 of address and set to BASE and OFFSET, otherwise return false. */
22833 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
22837 gcc_assert (MEM_P (mem
));
22839 addr
= XEXP (mem
, 0);
22841 if (GET_CODE (addr
) == CONST
)
22842 addr
= XEXP (addr
, 0);
22844 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
22847 *offset
= const0_rtx
;
22851 if (GET_CODE (addr
) == PLUS
22852 && (REG_P (XEXP (addr
, 0))
22853 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
22854 && CONST_INT_P (XEXP (addr
, 1)))
22856 *base
= XEXP (addr
, 0);
22857 *offset
= XEXP (addr
, 1);
22864 /* Given OPERANDS of consecutive load/store, check if we can merge
22865 them into move multiple. LOAD is true if they are load instructions.
22866 MODE is the mode of memory operands. */
22869 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
22872 HOST_WIDE_INT offval_1
, offval_2
, msize
;
22873 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
22877 mem_1
= operands
[1];
22878 mem_2
= operands
[3];
22879 reg_1
= operands
[0];
22880 reg_2
= operands
[2];
22884 mem_1
= operands
[0];
22885 mem_2
= operands
[2];
22886 reg_1
= operands
[1];
22887 reg_2
= operands
[3];
22890 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
22892 if (REGNO (reg_1
) != REGNO (reg_2
))
22895 /* Check if the addresses are in the form of [base+offset]. */
22896 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
22898 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
22901 /* Check if the bases are the same. */
22902 if (!rtx_equal_p (base_1
, base_2
))
22905 offval_1
= INTVAL (offset_1
);
22906 offval_2
= INTVAL (offset_2
);
22907 msize
= GET_MODE_SIZE (mode
);
22908 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22909 if (offval_1
+ msize
!= offval_2
)
22915 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22918 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
22919 optimization_type opt_type
)
22933 return opt_type
== OPTIMIZE_FOR_SPEED
;
22936 if (SSE_FLOAT_MODE_P (mode1
)
22938 && !flag_trapping_math
22940 return opt_type
== OPTIMIZE_FOR_SPEED
;
22946 if (SSE_FLOAT_MODE_P (mode1
)
22948 && !flag_trapping_math
22951 return opt_type
== OPTIMIZE_FOR_SPEED
;
22954 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p ();
22961 /* Address space support.
22963 This is not "far pointers" in the 16-bit sense, but an easy way
22964 to use %fs and %gs segment prefixes. Therefore:
22966 (a) All address spaces have the same modes,
22967 (b) All address spaces have the same addresss forms,
22968 (c) While %fs and %gs are technically subsets of the generic
22969 address space, they are probably not subsets of each other.
22970 (d) Since we have no access to the segment base register values
22971 without resorting to a system call, we cannot convert a
22972 non-default address space to a default address space.
22973 Therefore we do not claim %fs or %gs are subsets of generic.
22975 Therefore we can (mostly) use the default hooks. */
22977 /* All use of segmentation is assumed to make address 0 valid. */
22980 ix86_addr_space_zero_address_valid (addr_space_t as
)
22982 return as
!= ADDR_SPACE_GENERIC
;
22986 ix86_init_libfuncs (void)
22990 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
22991 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
22995 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
22996 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
23000 darwin_rename_builtins ();
23004 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
23005 FPU, assume that the fpcw is set to extended precision; when using
23006 only SSE, rounding is correct; when using both SSE and the FPU,
23007 the rounding precision is indeterminate, since either may be chosen
23008 apparently at random. */
23010 static enum flt_eval_method
23011 ix86_excess_precision (enum excess_precision_type type
)
23015 case EXCESS_PRECISION_TYPE_FAST
:
23016 /* The fastest type to promote to will always be the native type,
23017 whether that occurs with implicit excess precision or
23019 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
23020 case EXCESS_PRECISION_TYPE_STANDARD
:
23021 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23022 /* Otherwise, the excess precision we want when we are
23023 in a standards compliant mode, and the implicit precision we
23024 provide would be identical were it not for the unpredictable
23027 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
23028 else if (!TARGET_MIX_SSE_I387
)
23030 if (!(TARGET_SSE
&& TARGET_SSE_MATH
))
23031 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
23032 else if (TARGET_SSE2
)
23033 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
23036 /* If we are in standards compliant mode, but we know we will
23037 calculate in unpredictable precision, return
23038 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
23039 excess precision if the target can't guarantee it will honor
23041 return (type
== EXCESS_PRECISION_TYPE_STANDARD
23042 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
23043 : FLT_EVAL_METHOD_UNPREDICTABLE
);
23045 gcc_unreachable ();
23048 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23051 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
23052 decrements by exactly 2 no matter what the position was, there is no pushb.
23054 But as CIE data alignment factor on this arch is -4 for 32bit targets
23055 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
23056 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
23059 ix86_push_rounding (poly_int64 bytes
)
23061 return ROUND_UP (bytes
, UNITS_PER_WORD
);
23064 /* Target-specific selftests. */
23068 namespace selftest
{
23070 /* Verify that hard regs are dumped as expected (in compact mode). */
23073 ix86_test_dumping_hard_regs ()
23075 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
23076 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
23079 /* Test dumping an insn with repeated references to the same SCRATCH,
23080 to verify the rtx_reuse code. */
23083 ix86_test_dumping_memory_blockage ()
23085 set_new_first_and_last_insn (NULL
, NULL
);
23087 rtx pat
= gen_memory_blockage ();
23088 rtx_reuse_manager r
;
23089 r
.preprocess (pat
);
23091 /* Verify that the repeated references to the SCRATCH show use
23092 reuse IDS. The first should be prefixed with a reuse ID,
23093 and the second should be dumped as a "reuse_rtx" of that ID.
23094 The expected string assumes Pmode == DImode. */
23095 if (Pmode
== DImode
)
23096 ASSERT_RTL_DUMP_EQ_WITH_REUSE
23097 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
23099 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
23100 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
23103 /* Verify loading an RTL dump; specifically a dump of copying
23104 a param on x86_64 from a hard reg into the frame.
23105 This test is target-specific since the dump contains target-specific
23109 ix86_test_loading_dump_fragment_1 ()
23111 rtl_dump_test
t (SELFTEST_LOCATION
,
23112 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
23114 rtx_insn
*insn
= get_insn_by_uid (1);
23116 /* The block structure and indentation here is purely for
23117 readability; it mirrors the structure of the rtx. */
23120 rtx pat
= PATTERN (insn
);
23121 ASSERT_EQ (SET
, GET_CODE (pat
));
23123 rtx dest
= SET_DEST (pat
);
23124 ASSERT_EQ (MEM
, GET_CODE (dest
));
23125 /* Verify the "/c" was parsed. */
23126 ASSERT_TRUE (RTX_FLAG (dest
, call
));
23127 ASSERT_EQ (SImode
, GET_MODE (dest
));
23129 rtx addr
= XEXP (dest
, 0);
23130 ASSERT_EQ (PLUS
, GET_CODE (addr
));
23131 ASSERT_EQ (DImode
, GET_MODE (addr
));
23133 rtx lhs
= XEXP (addr
, 0);
23134 /* Verify that the "frame" REG was consolidated. */
23135 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
23138 rtx rhs
= XEXP (addr
, 1);
23139 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
23140 ASSERT_EQ (-4, INTVAL (rhs
));
23143 /* Verify the "[1 i+0 S4 A32]" was parsed. */
23144 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
23145 /* "i" should have been handled by synthesizing a global int
23146 variable named "i". */
23147 mem_expr
= MEM_EXPR (dest
);
23148 ASSERT_NE (mem_expr
, NULL
);
23149 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
23150 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
23151 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
23152 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
23154 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
23155 ASSERT_EQ (0, MEM_OFFSET (dest
));
23157 ASSERT_EQ (4, MEM_SIZE (dest
));
23159 ASSERT_EQ (32, MEM_ALIGN (dest
));
23162 rtx src
= SET_SRC (pat
);
23163 ASSERT_EQ (REG
, GET_CODE (src
));
23164 ASSERT_EQ (SImode
, GET_MODE (src
));
23165 ASSERT_EQ (5, REGNO (src
));
23166 tree reg_expr
= REG_EXPR (src
);
23167 /* "i" here should point to the same var as for the MEM_EXPR. */
23168 ASSERT_EQ (reg_expr
, mem_expr
);
23173 /* Verify that the RTL loader copes with a call_insn dump.
23174 This test is target-specific since the dump contains a target-specific
23178 ix86_test_loading_call_insn ()
23180 /* The test dump includes register "xmm0", where requires TARGET_SSE
23185 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
23187 rtx_insn
*insn
= get_insns ();
23188 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
23191 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
23193 rtx pat
= PATTERN (insn
);
23194 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
23196 /* Verify REG_NOTES. */
23198 /* "(expr_list:REG_CALL_DECL". */
23199 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
23200 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
23201 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
23203 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
23204 rtx_expr_list
*note1
= note0
->next ();
23205 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
23207 ASSERT_EQ (NULL
, note1
->next ());
23210 /* Verify CALL_INSN_FUNCTION_USAGE. */
23212 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
23213 rtx_expr_list
*usage
23214 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
23215 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
23216 ASSERT_EQ (DFmode
, GET_MODE (usage
));
23217 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
23218 ASSERT_EQ (NULL
, usage
->next ());
23222 /* Verify that the RTL loader copes a dump from print_rtx_function.
23223 This test is target-specific since the dump contains target-specific
23227 ix86_test_loading_full_dump ()
23229 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
23231 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
23233 rtx_insn
*insn_1
= get_insn_by_uid (1);
23234 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
23236 rtx_insn
*insn_7
= get_insn_by_uid (7);
23237 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
23238 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
23240 rtx_insn
*insn_15
= get_insn_by_uid (15);
23241 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
23242 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
23244 /* Verify crtl->return_rtx. */
23245 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
23246 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
23247 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
23250 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
23251 In particular, verify that it correctly loads the 2nd operand.
23252 This test is target-specific since these are machine-specific
23253 operands (and enums). */
23256 ix86_test_loading_unspec ()
23258 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
23260 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
23262 ASSERT_TRUE (cfun
);
23264 /* Test of an UNSPEC. */
23265 rtx_insn
*insn
= get_insns ();
23266 ASSERT_EQ (INSN
, GET_CODE (insn
));
23267 rtx set
= single_set (insn
);
23268 ASSERT_NE (NULL
, set
);
23269 rtx dst
= SET_DEST (set
);
23270 ASSERT_EQ (MEM
, GET_CODE (dst
));
23271 rtx src
= SET_SRC (set
);
23272 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
23273 ASSERT_EQ (BLKmode
, GET_MODE (src
));
23274 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
23276 rtx v0
= XVECEXP (src
, 0, 0);
23278 /* Verify that the two uses of the first SCRATCH have pointer
23280 rtx scratch_a
= XEXP (dst
, 0);
23281 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
23283 rtx scratch_b
= XEXP (v0
, 0);
23284 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
23286 ASSERT_EQ (scratch_a
, scratch_b
);
23288 /* Verify that the two mems are thus treated as equal. */
23289 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
23291 /* Verify that the insn is recognized. */
23292 ASSERT_NE(-1, recog_memoized (insn
));
23294 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
23295 insn
= NEXT_INSN (insn
);
23296 ASSERT_EQ (INSN
, GET_CODE (insn
));
23298 set
= single_set (insn
);
23299 ASSERT_NE (NULL
, set
);
23301 src
= SET_SRC (set
);
23302 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
23303 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
23306 /* Run all target-specific selftests. */
23309 ix86_run_selftests (void)
23311 ix86_test_dumping_hard_regs ();
23312 ix86_test_dumping_memory_blockage ();
23314 /* Various tests of loading RTL dumps, here because they contain
23315 ix86-isms (e.g. names of hard regs). */
23316 ix86_test_loading_dump_fragment_1 ();
23317 ix86_test_loading_call_insn ();
23318 ix86_test_loading_full_dump ();
23319 ix86_test_loading_unspec ();
23322 } // namespace selftest
23324 #endif /* CHECKING_P */
23326 /* Initialize the GCC target structure. */
23327 #undef TARGET_RETURN_IN_MEMORY
23328 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
23330 #undef TARGET_LEGITIMIZE_ADDRESS
23331 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
23333 #undef TARGET_ATTRIBUTE_TABLE
23334 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23335 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
23336 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
23337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23338 # undef TARGET_MERGE_DECL_ATTRIBUTES
23339 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23342 #undef TARGET_COMP_TYPE_ATTRIBUTES
23343 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23345 #undef TARGET_INIT_BUILTINS
23346 #define TARGET_INIT_BUILTINS ix86_init_builtins
23347 #undef TARGET_BUILTIN_DECL
23348 #define TARGET_BUILTIN_DECL ix86_builtin_decl
23349 #undef TARGET_EXPAND_BUILTIN
23350 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23352 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23353 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23354 ix86_builtin_vectorized_function
23356 #undef TARGET_VECTORIZE_BUILTIN_GATHER
23357 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
23359 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
23360 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
23362 #undef TARGET_BUILTIN_RECIPROCAL
23363 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23365 #undef TARGET_ASM_FUNCTION_EPILOGUE
23366 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23368 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
23369 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
23370 ix86_print_patchable_function_entry
23372 #undef TARGET_ENCODE_SECTION_INFO
23373 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23374 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23376 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23379 #undef TARGET_ASM_OPEN_PAREN
23380 #define TARGET_ASM_OPEN_PAREN ""
23381 #undef TARGET_ASM_CLOSE_PAREN
23382 #define TARGET_ASM_CLOSE_PAREN ""
23384 #undef TARGET_ASM_BYTE_OP
23385 #define TARGET_ASM_BYTE_OP ASM_BYTE
23387 #undef TARGET_ASM_ALIGNED_HI_OP
23388 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23389 #undef TARGET_ASM_ALIGNED_SI_OP
23390 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23392 #undef TARGET_ASM_ALIGNED_DI_OP
23393 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23396 #undef TARGET_PROFILE_BEFORE_PROLOGUE
23397 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23399 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23400 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23402 #undef TARGET_ASM_UNALIGNED_HI_OP
23403 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23404 #undef TARGET_ASM_UNALIGNED_SI_OP
23405 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23406 #undef TARGET_ASM_UNALIGNED_DI_OP
23407 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23409 #undef TARGET_PRINT_OPERAND
23410 #define TARGET_PRINT_OPERAND ix86_print_operand
23411 #undef TARGET_PRINT_OPERAND_ADDRESS
23412 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23413 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23414 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23415 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23416 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23418 #undef TARGET_SCHED_INIT_GLOBAL
23419 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23420 #undef TARGET_SCHED_ADJUST_COST
23421 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23422 #undef TARGET_SCHED_ISSUE_RATE
23423 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23424 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23425 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23426 ia32_multipass_dfa_lookahead
23427 #undef TARGET_SCHED_MACRO_FUSION_P
23428 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23432 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23433 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23435 #undef TARGET_MEMMODEL_CHECK
23436 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23438 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23439 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23442 #undef TARGET_HAVE_TLS
23443 #define TARGET_HAVE_TLS true
23445 #undef TARGET_CANNOT_FORCE_CONST_MEM
23446 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23447 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23448 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23450 #undef TARGET_DELEGITIMIZE_ADDRESS
23451 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23453 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23454 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23456 #undef TARGET_MS_BITFIELD_LAYOUT_P
23457 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23460 #undef TARGET_BINDS_LOCAL_P
23461 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23463 #undef TARGET_BINDS_LOCAL_P
23464 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23466 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23467 #undef TARGET_BINDS_LOCAL_P
23468 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23471 #undef TARGET_ASM_OUTPUT_MI_THUNK
23472 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23473 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23474 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23476 #undef TARGET_ASM_FILE_START
23477 #define TARGET_ASM_FILE_START x86_file_start
23479 #undef TARGET_OPTION_OVERRIDE
23480 #define TARGET_OPTION_OVERRIDE ix86_option_override
23482 #undef TARGET_REGISTER_MOVE_COST
23483 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23484 #undef TARGET_MEMORY_MOVE_COST
23485 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23486 #undef TARGET_RTX_COSTS
23487 #define TARGET_RTX_COSTS ix86_rtx_costs
23488 #undef TARGET_ADDRESS_COST
23489 #define TARGET_ADDRESS_COST ix86_address_cost
23491 #undef TARGET_FLAGS_REGNUM
23492 #define TARGET_FLAGS_REGNUM FLAGS_REG
23493 #undef TARGET_FIXED_CONDITION_CODE_REGS
23494 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23495 #undef TARGET_CC_MODES_COMPATIBLE
23496 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23498 #undef TARGET_MACHINE_DEPENDENT_REORG
23499 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23501 #undef TARGET_BUILD_BUILTIN_VA_LIST
23502 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23504 #undef TARGET_FOLD_BUILTIN
23505 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23507 #undef TARGET_GIMPLE_FOLD_BUILTIN
23508 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23510 #undef TARGET_COMPARE_VERSION_PRIORITY
23511 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23513 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23514 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23515 ix86_generate_version_dispatcher_body
23517 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23518 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23519 ix86_get_function_versions_dispatcher
23521 #undef TARGET_ENUM_VA_LIST_P
23522 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23524 #undef TARGET_FN_ABI_VA_LIST
23525 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23527 #undef TARGET_CANONICAL_VA_LIST_TYPE
23528 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23530 #undef TARGET_EXPAND_BUILTIN_VA_START
23531 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23533 #undef TARGET_MD_ASM_ADJUST
23534 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23536 #undef TARGET_C_EXCESS_PRECISION
23537 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
23538 #undef TARGET_PROMOTE_PROTOTYPES
23539 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23540 #undef TARGET_SETUP_INCOMING_VARARGS
23541 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23542 #undef TARGET_MUST_PASS_IN_STACK
23543 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23544 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23545 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23546 #undef TARGET_FUNCTION_ARG_ADVANCE
23547 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23548 #undef TARGET_FUNCTION_ARG
23549 #define TARGET_FUNCTION_ARG ix86_function_arg
23550 #undef TARGET_INIT_PIC_REG
23551 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23552 #undef TARGET_USE_PSEUDO_PIC_REG
23553 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23554 #undef TARGET_FUNCTION_ARG_BOUNDARY
23555 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23556 #undef TARGET_PASS_BY_REFERENCE
23557 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23558 #undef TARGET_INTERNAL_ARG_POINTER
23559 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23560 #undef TARGET_UPDATE_STACK_BOUNDARY
23561 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23562 #undef TARGET_GET_DRAP_RTX
23563 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23564 #undef TARGET_STRICT_ARGUMENT_NAMING
23565 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23566 #undef TARGET_STATIC_CHAIN
23567 #define TARGET_STATIC_CHAIN ix86_static_chain
23568 #undef TARGET_TRAMPOLINE_INIT
23569 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23570 #undef TARGET_RETURN_POPS_ARGS
23571 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23573 #undef TARGET_WARN_FUNC_RETURN
23574 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23576 #undef TARGET_LEGITIMATE_COMBINED_INSN
23577 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23579 #undef TARGET_ASAN_SHADOW_OFFSET
23580 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23582 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23583 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23585 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23586 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23588 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23589 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23591 #undef TARGET_C_MODE_FOR_SUFFIX
23592 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23599 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23600 #undef TARGET_INSERT_ATTRIBUTES
23601 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23604 #undef TARGET_MANGLE_TYPE
23605 #define TARGET_MANGLE_TYPE ix86_mangle_type
23607 #undef TARGET_STACK_PROTECT_GUARD
23608 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23611 #undef TARGET_STACK_PROTECT_FAIL
23612 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23615 #undef TARGET_FUNCTION_VALUE
23616 #define TARGET_FUNCTION_VALUE ix86_function_value
23618 #undef TARGET_FUNCTION_VALUE_REGNO_P
23619 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23621 #undef TARGET_ZERO_CALL_USED_REGS
23622 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
23624 #undef TARGET_PROMOTE_FUNCTION_MODE
23625 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23627 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23628 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23630 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23631 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23633 #undef TARGET_INSTANTIATE_DECLS
23634 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23636 #undef TARGET_SECONDARY_RELOAD
23637 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23638 #undef TARGET_SECONDARY_MEMORY_NEEDED
23639 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23640 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23641 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23643 #undef TARGET_CLASS_MAX_NREGS
23644 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23646 #undef TARGET_PREFERRED_RELOAD_CLASS
23647 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23648 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23649 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23650 #undef TARGET_CLASS_LIKELY_SPILLED_P
23651 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23653 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23654 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23655 ix86_builtin_vectorization_cost
23656 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23657 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23658 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23659 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23660 ix86_preferred_simd_mode
23661 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23662 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23663 ix86_split_reduction
23664 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23665 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23666 ix86_autovectorize_vector_modes
23667 #undef TARGET_VECTORIZE_GET_MASK_MODE
23668 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23669 #undef TARGET_VECTORIZE_INIT_COST
23670 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23671 #undef TARGET_VECTORIZE_ADD_STMT_COST
23672 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23673 #undef TARGET_VECTORIZE_FINISH_COST
23674 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23675 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23676 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23678 #undef TARGET_SET_CURRENT_FUNCTION
23679 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23681 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23682 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23684 #undef TARGET_OPTION_SAVE
23685 #define TARGET_OPTION_SAVE ix86_function_specific_save
23687 #undef TARGET_OPTION_RESTORE
23688 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23690 #undef TARGET_OPTION_POST_STREAM_IN
23691 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23693 #undef TARGET_OPTION_PRINT
23694 #define TARGET_OPTION_PRINT ix86_function_specific_print
23696 #undef TARGET_OPTION_FUNCTION_VERSIONS
23697 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23699 #undef TARGET_CAN_INLINE_P
23700 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23702 #undef TARGET_LEGITIMATE_ADDRESS_P
23703 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23705 #undef TARGET_REGISTER_PRIORITY
23706 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23708 #undef TARGET_REGISTER_USAGE_LEVELING_P
23709 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23711 #undef TARGET_LEGITIMATE_CONSTANT_P
23712 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23714 #undef TARGET_COMPUTE_FRAME_LAYOUT
23715 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23717 #undef TARGET_FRAME_POINTER_REQUIRED
23718 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23720 #undef TARGET_CAN_ELIMINATE
23721 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23723 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23724 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23726 #undef TARGET_ASM_CODE_END
23727 #define TARGET_ASM_CODE_END ix86_code_end
23729 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23730 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23732 #undef TARGET_CANONICALIZE_COMPARISON
23733 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23735 #undef TARGET_LOOP_UNROLL_ADJUST
23736 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23738 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23739 #undef TARGET_SPILL_CLASS
23740 #define TARGET_SPILL_CLASS ix86_spill_class
23742 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23743 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23744 ix86_simd_clone_compute_vecsize_and_simdlen
23746 #undef TARGET_SIMD_CLONE_ADJUST
23747 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23749 #undef TARGET_SIMD_CLONE_USABLE
23750 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23752 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23753 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23755 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23756 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23757 ix86_float_exceptions_rounding_supported_p
23759 #undef TARGET_MODE_EMIT
23760 #define TARGET_MODE_EMIT ix86_emit_mode_set
23762 #undef TARGET_MODE_NEEDED
23763 #define TARGET_MODE_NEEDED ix86_mode_needed
23765 #undef TARGET_MODE_AFTER
23766 #define TARGET_MODE_AFTER ix86_mode_after
23768 #undef TARGET_MODE_ENTRY
23769 #define TARGET_MODE_ENTRY ix86_mode_entry
23771 #undef TARGET_MODE_EXIT
23772 #define TARGET_MODE_EXIT ix86_mode_exit
23774 #undef TARGET_MODE_PRIORITY
23775 #define TARGET_MODE_PRIORITY ix86_mode_priority
23777 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23778 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23780 #undef TARGET_OFFLOAD_OPTIONS
23781 #define TARGET_OFFLOAD_OPTIONS \
23782 ix86_offload_options
23784 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23785 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23787 #undef TARGET_OPTAB_SUPPORTED_P
23788 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23790 #undef TARGET_HARD_REGNO_SCRATCH_OK
23791 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23793 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23794 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23796 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23797 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23799 #undef TARGET_INIT_LIBFUNCS
23800 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23802 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23803 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23805 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23806 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23808 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23809 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23811 #undef TARGET_HARD_REGNO_NREGS
23812 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23813 #undef TARGET_HARD_REGNO_MODE_OK
23814 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23816 #undef TARGET_MODES_TIEABLE_P
23817 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23819 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23820 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23821 ix86_hard_regno_call_part_clobbered
23823 #undef TARGET_CAN_CHANGE_MODE_CLASS
23824 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23826 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
23827 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
23829 #undef TARGET_STATIC_RTX_ALIGNMENT
23830 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23831 #undef TARGET_CONSTANT_ALIGNMENT
23832 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23834 #undef TARGET_EMPTY_RECORD_P
23835 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23837 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23838 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23840 #undef TARGET_GET_MULTILIB_ABI_NAME
23841 #define TARGET_GET_MULTILIB_ABI_NAME \
23842 ix86_get_multilib_abi_name
23844 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED
)
23846 #ifdef OPTION_GLIBC
23848 return (built_in_function
)fcode
== BUILT_IN_MEMPCPY
;
23856 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23857 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23860 #undef TARGET_RUN_TARGET_SELFTESTS
23861 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23862 #endif /* #if CHECKING_P */
23864 struct gcc_target targetm
= TARGET_INITIALIZER
;
23866 #include "gt-i386.h"