i386: Enable _BitInt on x86-64 [PR102989]
[official-gcc.git] / gcc / config / i386 / i386.cc
blob1cef7ee8f1a38dacc79ace60e06f9602bf7e1c2c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "gimple-fold.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "dojump.h"
79 #include "fold-const-call.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "selftest.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
85 #include "intl.h"
86 #include "ifcvt.h"
87 #include "symbol-summary.h"
88 #include "ipa-prop.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
92 #include "debug.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
98 #include "function-abi.h"
99 #include "rtl-error.h"
101 /* This file should be included last. */
102 #include "target-def.h"
104 static rtx legitimize_dllimport_symbol (rtx, bool);
105 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
106 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
107 static void ix86_emit_restore_reg_using_pop (rtx);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
123 /* Set by -mtune. */
124 const struct processor_costs *ix86_tune_cost = NULL;
126 /* Set by -mtune or -Os. */
127 const struct processor_costs *ix86_cost = NULL;
129 /* In case the average insn count for single function invocation is
130 lower than this constant, emit fast (but longer) prologue and
131 epilogue code. */
132 #define FAST_PROLOGUE_INSN_COUNT 20
134 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
135 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
136 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
137 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
139 /* Array of the smallest class containing reg number REGNO, indexed by
140 REGNO. Used by REGNO_REG_CLASS in i386.h. */
142 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
144 /* ax, dx, cx, bx */
145 AREG, DREG, CREG, BREG,
146 /* si, di, bp, sp */
147 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
148 /* FP registers */
149 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
150 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
151 /* arg pointer, flags, fpsr, frame */
152 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
153 /* SSE registers */
154 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
156 /* MMX registers */
157 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
158 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
159 /* REX registers */
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 /* SSE REX registers */
163 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
164 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
165 /* AVX-512 SSE registers */
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 /* Mask registers. */
171 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
172 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
175 /* The "default" register map used in 32bit mode. */
177 int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
179 /* general regs */
180 0, 2, 1, 3, 6, 7, 4, 5,
181 /* fp regs */
182 12, 13, 14, 15, 16, 17, 18, 19,
183 /* arg, flags, fpsr, frame */
184 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
185 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
186 /* SSE */
187 21, 22, 23, 24, 25, 26, 27, 28,
188 /* MMX */
189 29, 30, 31, 32, 33, 34, 35, 36,
190 /* extended integer registers */
191 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 /* extended sse registers */
194 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 /* AVX-512 registers 16-23 */
197 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 /* AVX-512 registers 24-31 */
200 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
201 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 /* Mask registers */
203 93, 94, 95, 96, 97, 98, 99, 100
206 /* The "default" register map used in 64bit mode. */
208 int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
210 /* general regs */
211 0, 1, 2, 3, 4, 5, 6, 7,
212 /* fp regs */
213 33, 34, 35, 36, 37, 38, 39, 40,
214 /* arg, flags, fpsr, frame */
215 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
216 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
217 /* SSE */
218 17, 18, 19, 20, 21, 22, 23, 24,
219 /* MMX */
220 41, 42, 43, 44, 45, 46, 47, 48,
221 /* extended integer registers */
222 8, 9, 10, 11, 12, 13, 14, 15,
223 /* extended SSE registers */
224 25, 26, 27, 28, 29, 30, 31, 32,
225 /* AVX-512 registers 16-23 */
226 67, 68, 69, 70, 71, 72, 73, 74,
227 /* AVX-512 registers 24-31 */
228 75, 76, 77, 78, 79, 80, 81, 82,
229 /* Mask registers */
230 118, 119, 120, 121, 122, 123, 124, 125
233 /* Define the register numbers to be used in Dwarf debugging information.
234 The SVR4 reference port C compiler uses the following register numbers
235 in its Dwarf output code:
236 0 for %eax (gcc regno = 0)
237 1 for %ecx (gcc regno = 2)
238 2 for %edx (gcc regno = 1)
239 3 for %ebx (gcc regno = 3)
240 4 for %esp (gcc regno = 7)
241 5 for %ebp (gcc regno = 6)
242 6 for %esi (gcc regno = 4)
243 7 for %edi (gcc regno = 5)
244 The following three DWARF register numbers are never generated by
245 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
246 believed these numbers have these meanings.
247 8 for %eip (no gcc equivalent)
248 9 for %eflags (gcc regno = 17)
249 10 for %trapno (no gcc equivalent)
250 It is not at all clear how we should number the FP stack registers
251 for the x86 architecture. If the version of SDB on x86/svr4 were
252 a bit less brain dead with respect to floating-point then we would
253 have a precedent to follow with respect to DWARF register numbers
254 for x86 FP registers, but the SDB on x86/svr4 was so completely
255 broken with respect to FP registers that it is hardly worth thinking
256 of it as something to strive for compatibility with.
257 The version of x86/svr4 SDB I had does (partially)
258 seem to believe that DWARF register number 11 is associated with
259 the x86 register %st(0), but that's about all. Higher DWARF
260 register numbers don't seem to be associated with anything in
261 particular, and even for DWARF regno 11, SDB only seemed to under-
262 stand that it should say that a variable lives in %st(0) (when
263 asked via an `=' command) if we said it was in DWARF regno 11,
264 but SDB still printed garbage when asked for the value of the
265 variable in question (via a `/' command).
266 (Also note that the labels SDB printed for various FP stack regs
267 when doing an `x' command were all wrong.)
268 Note that these problems generally don't affect the native SVR4
269 C compiler because it doesn't allow the use of -O with -g and
270 because when it is *not* optimizing, it allocates a memory
271 location for each floating-point variable, and the memory
272 location is what gets described in the DWARF AT_location
273 attribute for the variable in question.
274 Regardless of the severe mental illness of the x86/svr4 SDB, we
275 do something sensible here and we use the following DWARF
276 register numbers. Note that these are all stack-top-relative
277 numbers.
278 11 for %st(0) (gcc regno = 8)
279 12 for %st(1) (gcc regno = 9)
280 13 for %st(2) (gcc regno = 10)
281 14 for %st(3) (gcc regno = 11)
282 15 for %st(4) (gcc regno = 12)
283 16 for %st(5) (gcc regno = 13)
284 17 for %st(6) (gcc regno = 14)
285 18 for %st(7) (gcc regno = 15)
287 int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
289 /* general regs */
290 0, 2, 1, 3, 6, 7, 5, 4,
291 /* fp regs */
292 11, 12, 13, 14, 15, 16, 17, 18,
293 /* arg, flags, fpsr, frame */
294 IGNORED_DWARF_REGNUM, 9,
295 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
296 /* SSE registers */
297 21, 22, 23, 24, 25, 26, 27, 28,
298 /* MMX registers */
299 29, 30, 31, 32, 33, 34, 35, 36,
300 /* extended integer registers */
301 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 /* extended sse registers */
304 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 /* AVX-512 registers 16-23 */
307 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 /* AVX-512 registers 24-31 */
310 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
311 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 /* Mask registers */
313 93, 94, 95, 96, 97, 98, 99, 100
316 /* Define parameter passing and return registers. */
318 static int const x86_64_int_parameter_registers[6] =
320 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
323 static int const x86_64_ms_abi_int_parameter_registers[4] =
325 CX_REG, DX_REG, R8_REG, R9_REG
328 static int const x86_64_int_return_registers[4] =
330 AX_REG, DX_REG, DI_REG, SI_REG
333 /* Define the structure for the machine field in struct function. */
335 struct GTY(()) stack_local_entry {
336 unsigned short mode;
337 unsigned short n;
338 rtx rtl;
339 struct stack_local_entry *next;
342 /* Which cpu are we scheduling for. */
343 enum attr_cpu ix86_schedule;
345 /* Which cpu are we optimizing for. */
346 enum processor_type ix86_tune;
348 /* Which instruction set architecture to use. */
349 enum processor_type ix86_arch;
351 /* True if processor has SSE prefetch instruction. */
352 unsigned char ix86_prefetch_sse;
354 /* Preferred alignment for stack boundary in bits. */
355 unsigned int ix86_preferred_stack_boundary;
357 /* Alignment for incoming stack boundary in bits specified at
358 command line. */
359 unsigned int ix86_user_incoming_stack_boundary;
361 /* Default alignment for incoming stack boundary in bits. */
362 unsigned int ix86_default_incoming_stack_boundary;
364 /* Alignment for incoming stack boundary in bits. */
365 unsigned int ix86_incoming_stack_boundary;
367 /* True if there is no direct access to extern symbols. */
368 bool ix86_has_no_direct_extern_access;
370 /* Calling abi specific va_list type nodes. */
371 tree sysv_va_list_type_node;
372 tree ms_va_list_type_node;
374 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
375 char internal_label_prefix[16];
376 int internal_label_prefix_len;
378 /* Fence to use after loop using movnt. */
379 tree x86_mfence;
381 /* Register class used for passing given 64bit part of the argument.
382 These represent classes as documented by the PS ABI, with the exception
383 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
384 use SF or DFmode move instead of DImode to avoid reformatting penalties.
386 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
387 whenever possible (upper half does contain padding). */
388 enum x86_64_reg_class
390 X86_64_NO_CLASS,
391 X86_64_INTEGER_CLASS,
392 X86_64_INTEGERSI_CLASS,
393 X86_64_SSE_CLASS,
394 X86_64_SSEHF_CLASS,
395 X86_64_SSESF_CLASS,
396 X86_64_SSEDF_CLASS,
397 X86_64_SSEUP_CLASS,
398 X86_64_X87_CLASS,
399 X86_64_X87UP_CLASS,
400 X86_64_COMPLEX_X87_CLASS,
401 X86_64_MEMORY_CLASS
404 #define MAX_CLASSES 8
406 /* Table of constants used by fldpi, fldln2, etc.... */
407 static REAL_VALUE_TYPE ext_80387_constants_table [5];
408 static bool ext_80387_constants_init;
411 static rtx ix86_function_value (const_tree, const_tree, bool);
412 static bool ix86_function_value_regno_p (const unsigned int);
413 static unsigned int ix86_function_arg_boundary (machine_mode,
414 const_tree);
415 static rtx ix86_static_chain (const_tree, bool);
416 static int ix86_function_regparm (const_tree, const_tree);
417 static void ix86_compute_frame_layout (void);
418 static tree ix86_canonical_va_list_type (tree);
419 static unsigned int split_stack_prologue_scratch_regno (void);
420 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
422 static bool ix86_can_inline_p (tree, tree);
423 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
426 /* Whether -mtune= or -march= were specified */
427 int ix86_tune_defaulted;
428 int ix86_arch_specified;
430 /* Return true if a red-zone is in use. We can't use red-zone when
431 there are local indirect jumps, like "indirect_jump" or "tablejump",
432 which jumps to another place in the function, since "call" in the
433 indirect thunk pushes the return address onto stack, destroying
434 red-zone.
436 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
437 for CALL, in red-zone, we can allow local indirect jumps with
438 indirect thunk. */
440 bool
441 ix86_using_red_zone (void)
443 return (TARGET_RED_ZONE
444 && !TARGET_64BIT_MS_ABI
445 && (!cfun->machine->has_local_indirect_jump
446 || cfun->machine->indirect_branch_type == indirect_branch_keep));
449 /* Return true, if profiling code should be emitted before
450 prologue. Otherwise it returns false.
451 Note: For x86 with "hotfix" it is sorried. */
452 static bool
453 ix86_profile_before_prologue (void)
455 return flag_fentry != 0;
458 /* Update register usage after having seen the compiler flags. */
460 static void
461 ix86_conditional_register_usage (void)
463 int i, c_mask;
465 /* If there are no caller-saved registers, preserve all registers.
466 except fixed_regs and registers used for function return value
467 since aggregate_value_p checks call_used_regs[regno] on return
468 value. */
469 if (cfun && cfun->machine->no_caller_saved_registers)
470 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
471 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
472 call_used_regs[i] = 0;
474 /* For 32-bit targets, disable the REX registers. */
475 if (! TARGET_64BIT)
477 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
478 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
479 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
480 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
481 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
482 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
485 /* See the definition of CALL_USED_REGISTERS in i386.h. */
486 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
488 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
490 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
492 /* Set/reset conditionally defined registers from
493 CALL_USED_REGISTERS initializer. */
494 if (call_used_regs[i] > 1)
495 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
497 /* Calculate registers of CLOBBERED_REGS register set
498 as call used registers from GENERAL_REGS register set. */
499 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
500 && call_used_regs[i])
501 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
504 /* If MMX is disabled, disable the registers. */
505 if (! TARGET_MMX)
506 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
508 /* If SSE is disabled, disable the registers. */
509 if (! TARGET_SSE)
510 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
512 /* If the FPU is disabled, disable the registers. */
513 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
514 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
516 /* If AVX512F is disabled, disable the registers. */
517 if (! TARGET_AVX512F)
519 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
520 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
522 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
526 /* Canonicalize a comparison from one we don't have to one we do have. */
528 static void
529 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
530 bool op0_preserve_value)
532 /* The order of operands in x87 ficom compare is forced by combine in
533 simplify_comparison () function. Float operator is treated as RTX_OBJ
534 with a precedence over other operators and is always put in the first
535 place. Swap condition and operands to match ficom instruction. */
536 if (!op0_preserve_value
537 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
539 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
541 /* We are called only for compares that are split to SAHF instruction.
542 Ensure that we have setcc/jcc insn for the swapped condition. */
543 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
545 std::swap (*op0, *op1);
546 *code = (int) scode;
552 /* Hook to determine if one function can safely inline another. */
554 static bool
555 ix86_can_inline_p (tree caller, tree callee)
557 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
558 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
560 /* Changes of those flags can be tolerated for always inlines. Lets hope
561 user knows what he is doing. */
562 unsigned HOST_WIDE_INT always_inline_safe_mask
563 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
564 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
565 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
566 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
567 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
568 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
569 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
572 if (!callee_tree)
573 callee_tree = target_option_default_node;
574 if (!caller_tree)
575 caller_tree = target_option_default_node;
576 if (callee_tree == caller_tree)
577 return true;
579 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
580 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
581 bool ret = false;
582 bool always_inline
583 = (DECL_DISREGARD_INLINE_LIMITS (callee)
584 && lookup_attribute ("always_inline",
585 DECL_ATTRIBUTES (callee)));
587 /* If callee only uses GPRs, ignore MASK_80387. */
588 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
589 always_inline_safe_mask |= MASK_80387;
591 cgraph_node *callee_node = cgraph_node::get (callee);
592 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
593 function can inline a SSE2 function but a SSE2 function can't inline
594 a SSE4 function. */
595 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
596 != callee_opts->x_ix86_isa_flags)
597 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
598 != callee_opts->x_ix86_isa_flags2))
599 ret = false;
601 /* See if we have the same non-isa options. */
602 else if ((!always_inline
603 && caller_opts->x_target_flags != callee_opts->x_target_flags)
604 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
605 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
606 ret = false;
608 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
609 /* If the calle doesn't use FP expressions differences in
610 ix86_fpmath can be ignored. We are called from FEs
611 for multi-versioning call optimization, so beware of
612 ipa_fn_summaries not available. */
613 && (! ipa_fn_summaries
614 || ipa_fn_summaries->get (callee_node) == NULL
615 || ipa_fn_summaries->get (callee_node)->fp_expressions))
616 ret = false;
618 /* At this point we cannot identify whether arch or tune setting
619 comes from target attribute or not. So the most conservative way
620 is to allow the callee that uses default arch and tune string to
621 be inlined. */
622 else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
623 && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
624 ret = true;
626 /* See if arch, tune, etc. are the same. As previous ISA flags already
627 checks if callee's ISA is subset of caller's, do not block
628 always_inline attribute for callee even it has different arch. */
629 else if (!always_inline && caller_opts->arch != callee_opts->arch)
630 ret = false;
632 else if (!always_inline && caller_opts->tune != callee_opts->tune)
633 ret = false;
635 else if (!always_inline
636 && caller_opts->branch_cost != callee_opts->branch_cost)
637 ret = false;
639 else
640 ret = true;
642 return ret;
645 /* Return true if this goes in large data/bss. */
647 static bool
648 ix86_in_large_data_p (tree exp)
650 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
651 return false;
653 if (exp == NULL_TREE)
654 return false;
656 /* Functions are never large data. */
657 if (TREE_CODE (exp) == FUNCTION_DECL)
658 return false;
660 /* Automatic variables are never large data. */
661 if (VAR_P (exp) && !is_global_var (exp))
662 return false;
664 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
666 const char *section = DECL_SECTION_NAME (exp);
667 if (strcmp (section, ".ldata") == 0
668 || strcmp (section, ".lbss") == 0)
669 return true;
670 return false;
672 else
674 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
676 /* If this is an incomplete type with size 0, then we can't put it
677 in data because it might be too big when completed. Also,
678 int_size_in_bytes returns -1 if size can vary or is larger than
679 an integer in which case also it is safer to assume that it goes in
680 large data. */
681 if (size <= 0 || size > ix86_section_threshold)
682 return true;
685 return false;
688 /* i386-specific section flag to mark large sections. */
689 #define SECTION_LARGE SECTION_MACH_DEP
691 /* Switch to the appropriate section for output of DECL.
692 DECL is either a `VAR_DECL' node or a constant of some sort.
693 RELOC indicates whether forming the initial value of DECL requires
694 link-time relocations. */
696 ATTRIBUTE_UNUSED static section *
697 x86_64_elf_select_section (tree decl, int reloc,
698 unsigned HOST_WIDE_INT align)
700 if (ix86_in_large_data_p (decl))
702 const char *sname = NULL;
703 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
704 switch (categorize_decl_for_section (decl, reloc))
706 case SECCAT_DATA:
707 sname = ".ldata";
708 break;
709 case SECCAT_DATA_REL:
710 sname = ".ldata.rel";
711 break;
712 case SECCAT_DATA_REL_LOCAL:
713 sname = ".ldata.rel.local";
714 break;
715 case SECCAT_DATA_REL_RO:
716 sname = ".ldata.rel.ro";
717 break;
718 case SECCAT_DATA_REL_RO_LOCAL:
719 sname = ".ldata.rel.ro.local";
720 break;
721 case SECCAT_BSS:
722 sname = ".lbss";
723 flags |= SECTION_BSS;
724 break;
725 case SECCAT_RODATA:
726 case SECCAT_RODATA_MERGE_STR:
727 case SECCAT_RODATA_MERGE_STR_INIT:
728 case SECCAT_RODATA_MERGE_CONST:
729 sname = ".lrodata";
730 flags &= ~SECTION_WRITE;
731 break;
732 case SECCAT_SRODATA:
733 case SECCAT_SDATA:
734 case SECCAT_SBSS:
735 gcc_unreachable ();
736 case SECCAT_TEXT:
737 case SECCAT_TDATA:
738 case SECCAT_TBSS:
739 /* We don't split these for medium model. Place them into
740 default sections and hope for best. */
741 break;
743 if (sname)
745 /* We might get called with string constants, but get_named_section
746 doesn't like them as they are not DECLs. Also, we need to set
747 flags in that case. */
748 if (!DECL_P (decl))
749 return get_section (sname, flags, NULL);
750 return get_named_section (decl, sname, reloc);
753 return default_elf_select_section (decl, reloc, align);
756 /* Select a set of attributes for section NAME based on the properties
757 of DECL and whether or not RELOC indicates that DECL's initializer
758 might contain runtime relocations. */
760 static unsigned int ATTRIBUTE_UNUSED
761 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
763 unsigned int flags = default_section_type_flags (decl, name, reloc);
765 if (ix86_in_large_data_p (decl))
766 flags |= SECTION_LARGE;
768 if (decl == NULL_TREE
769 && (strcmp (name, ".ldata.rel.ro") == 0
770 || strcmp (name, ".ldata.rel.ro.local") == 0))
771 flags |= SECTION_RELRO;
773 if (strcmp (name, ".lbss") == 0
774 || startswith (name, ".lbss.")
775 || startswith (name, ".gnu.linkonce.lb."))
776 flags |= SECTION_BSS;
778 return flags;
781 /* Build up a unique section name, expressed as a
782 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
783 RELOC indicates whether the initial value of EXP requires
784 link-time relocations. */
786 static void ATTRIBUTE_UNUSED
787 x86_64_elf_unique_section (tree decl, int reloc)
789 if (ix86_in_large_data_p (decl))
791 const char *prefix = NULL;
792 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
793 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
795 switch (categorize_decl_for_section (decl, reloc))
797 case SECCAT_DATA:
798 case SECCAT_DATA_REL:
799 case SECCAT_DATA_REL_LOCAL:
800 case SECCAT_DATA_REL_RO:
801 case SECCAT_DATA_REL_RO_LOCAL:
802 prefix = one_only ? ".ld" : ".ldata";
803 break;
804 case SECCAT_BSS:
805 prefix = one_only ? ".lb" : ".lbss";
806 break;
807 case SECCAT_RODATA:
808 case SECCAT_RODATA_MERGE_STR:
809 case SECCAT_RODATA_MERGE_STR_INIT:
810 case SECCAT_RODATA_MERGE_CONST:
811 prefix = one_only ? ".lr" : ".lrodata";
812 break;
813 case SECCAT_SRODATA:
814 case SECCAT_SDATA:
815 case SECCAT_SBSS:
816 gcc_unreachable ();
817 case SECCAT_TEXT:
818 case SECCAT_TDATA:
819 case SECCAT_TBSS:
820 /* We don't split these for medium model. Place them into
821 default sections and hope for best. */
822 break;
824 if (prefix)
826 const char *name, *linkonce;
827 char *string;
829 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
830 name = targetm.strip_name_encoding (name);
832 /* If we're using one_only, then there needs to be a .gnu.linkonce
833 prefix to the section name. */
834 linkonce = one_only ? ".gnu.linkonce" : "";
836 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
838 set_decl_section_name (decl, string);
839 return;
842 default_unique_section (decl, reloc);
845 #ifdef COMMON_ASM_OP
847 #ifndef LARGECOMM_SECTION_ASM_OP
848 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
849 #endif
851 /* This says how to output assembler code to declare an
852 uninitialized external linkage data object.
854 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
855 large objects. */
856 void
857 x86_elf_aligned_decl_common (FILE *file, tree decl,
858 const char *name, unsigned HOST_WIDE_INT size,
859 unsigned align)
861 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
862 && size > (unsigned int)ix86_section_threshold)
864 switch_to_section (get_named_section (decl, ".lbss", 0));
865 fputs (LARGECOMM_SECTION_ASM_OP, file);
867 else
868 fputs (COMMON_ASM_OP, file);
869 assemble_name (file, name);
870 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
871 size, align / BITS_PER_UNIT);
873 #endif
875 /* Utility function for targets to use in implementing
876 ASM_OUTPUT_ALIGNED_BSS. */
878 void
879 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
880 unsigned HOST_WIDE_INT size, unsigned align)
882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
883 && size > (unsigned int)ix86_section_threshold)
884 switch_to_section (get_named_section (decl, ".lbss", 0));
885 else
886 switch_to_section (bss_section);
887 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
888 #ifdef ASM_DECLARE_OBJECT_NAME
889 last_assemble_variable_decl = decl;
890 ASM_DECLARE_OBJECT_NAME (file, name, decl);
891 #else
892 /* Standard thing is just output label for the object. */
893 ASM_OUTPUT_LABEL (file, name);
894 #endif /* ASM_DECLARE_OBJECT_NAME */
895 ASM_OUTPUT_SKIP (file, size ? size : 1);
898 /* Decide whether we must probe the stack before any space allocation
899 on this target. It's essentially TARGET_STACK_PROBE except when
900 -fstack-check causes the stack to be already probed differently. */
902 bool
903 ix86_target_stack_probe (void)
905 /* Do not probe the stack twice if static stack checking is enabled. */
906 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
907 return false;
909 return TARGET_STACK_PROBE;
912 /* Decide whether we can make a sibling call to a function. DECL is the
913 declaration of the function being targeted by the call and EXP is the
914 CALL_EXPR representing the call. */
916 static bool
917 ix86_function_ok_for_sibcall (tree decl, tree exp)
919 tree type, decl_or_type;
920 rtx a, b;
921 bool bind_global = decl && !targetm.binds_local_p (decl);
923 if (ix86_function_naked (current_function_decl))
924 return false;
926 /* Sibling call isn't OK if there are no caller-saved registers
927 since all registers must be preserved before return. */
928 if (cfun->machine->no_caller_saved_registers)
929 return false;
931 /* If we are generating position-independent code, we cannot sibcall
932 optimize direct calls to global functions, as the PLT requires
933 %ebx be live. (Darwin does not have a PLT.) */
934 if (!TARGET_MACHO
935 && !TARGET_64BIT
936 && flag_pic
937 && flag_plt
938 && bind_global)
939 return false;
941 /* If we need to align the outgoing stack, then sibcalling would
942 unalign the stack, which may break the called function. */
943 if (ix86_minimum_incoming_stack_boundary (true)
944 < PREFERRED_STACK_BOUNDARY)
945 return false;
947 if (decl)
949 decl_or_type = decl;
950 type = TREE_TYPE (decl);
952 else
954 /* We're looking at the CALL_EXPR, we need the type of the function. */
955 type = CALL_EXPR_FN (exp); /* pointer expression */
956 type = TREE_TYPE (type); /* pointer type */
957 type = TREE_TYPE (type); /* function type */
958 decl_or_type = type;
961 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
962 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
963 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
964 || (REG_PARM_STACK_SPACE (decl_or_type)
965 != REG_PARM_STACK_SPACE (current_function_decl)))
967 maybe_complain_about_tail_call (exp,
968 "inconsistent size of stack space"
969 " allocated for arguments which are"
970 " passed in registers");
971 return false;
974 /* Check that the return value locations are the same. Like
975 if we are returning floats on the 80387 register stack, we cannot
976 make a sibcall from a function that doesn't return a float to a
977 function that does or, conversely, from a function that does return
978 a float to a function that doesn't; the necessary stack adjustment
979 would not be executed. This is also the place we notice
980 differences in the return value ABI. Note that it is ok for one
981 of the functions to have void return type as long as the return
982 value of the other is passed in a register. */
983 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
984 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
985 cfun->decl, false);
986 if (STACK_REG_P (a) || STACK_REG_P (b))
988 if (!rtx_equal_p (a, b))
989 return false;
991 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
993 else if (!rtx_equal_p (a, b))
994 return false;
996 if (TARGET_64BIT)
998 /* The SYSV ABI has more call-clobbered registers;
999 disallow sibcalls from MS to SYSV. */
1000 if (cfun->machine->call_abi == MS_ABI
1001 && ix86_function_type_abi (type) == SYSV_ABI)
1002 return false;
1004 else
1006 /* If this call is indirect, we'll need to be able to use a
1007 call-clobbered register for the address of the target function.
1008 Make sure that all such registers are not used for passing
1009 parameters. Note that DLLIMPORT functions and call to global
1010 function via GOT slot are indirect. */
1011 if (!decl
1012 || (bind_global && flag_pic && !flag_plt)
1013 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1014 || flag_force_indirect_call)
1016 /* Check if regparm >= 3 since arg_reg_available is set to
1017 false if regparm == 0. If regparm is 1 or 2, there is
1018 always a call-clobbered register available.
1020 ??? The symbol indirect call doesn't need a call-clobbered
1021 register. But we don't know if this is a symbol indirect
1022 call or not here. */
1023 if (ix86_function_regparm (type, decl) >= 3
1024 && !cfun->machine->arg_reg_available)
1025 return false;
1029 if (decl && ix86_use_pseudo_pic_reg ())
1031 /* When PIC register is used, it must be restored after ifunc
1032 function returns. */
1033 cgraph_node *node = cgraph_node::get (decl);
1034 if (node && node->ifunc_resolver)
1035 return false;
1038 /* Disable sibcall if callee has indirect_return attribute and
1039 caller doesn't since callee will return to the caller's caller
1040 via an indirect jump. */
1041 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1042 == (CF_RETURN | CF_BRANCH))
1043 && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1044 && !lookup_attribute ("indirect_return",
1045 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1046 return false;
1048 /* Otherwise okay. That also includes certain types of indirect calls. */
1049 return true;
1052 /* This function determines from TYPE the calling-convention. */
1054 unsigned int
1055 ix86_get_callcvt (const_tree type)
1057 unsigned int ret = 0;
1058 bool is_stdarg;
1059 tree attrs;
1061 if (TARGET_64BIT)
1062 return IX86_CALLCVT_CDECL;
1064 attrs = TYPE_ATTRIBUTES (type);
1065 if (attrs != NULL_TREE)
1067 if (lookup_attribute ("cdecl", attrs))
1068 ret |= IX86_CALLCVT_CDECL;
1069 else if (lookup_attribute ("stdcall", attrs))
1070 ret |= IX86_CALLCVT_STDCALL;
1071 else if (lookup_attribute ("fastcall", attrs))
1072 ret |= IX86_CALLCVT_FASTCALL;
1073 else if (lookup_attribute ("thiscall", attrs))
1074 ret |= IX86_CALLCVT_THISCALL;
1076 /* Regparam isn't allowed for thiscall and fastcall. */
1077 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1079 if (lookup_attribute ("regparm", attrs))
1080 ret |= IX86_CALLCVT_REGPARM;
1081 if (lookup_attribute ("sseregparm", attrs))
1082 ret |= IX86_CALLCVT_SSEREGPARM;
1085 if (IX86_BASE_CALLCVT(ret) != 0)
1086 return ret;
1089 is_stdarg = stdarg_p (type);
1090 if (TARGET_RTD && !is_stdarg)
1091 return IX86_CALLCVT_STDCALL | ret;
1093 if (ret != 0
1094 || is_stdarg
1095 || TREE_CODE (type) != METHOD_TYPE
1096 || ix86_function_type_abi (type) != MS_ABI)
1097 return IX86_CALLCVT_CDECL | ret;
1099 return IX86_CALLCVT_THISCALL;
1102 /* Return 0 if the attributes for two types are incompatible, 1 if they
1103 are compatible, and 2 if they are nearly compatible (which causes a
1104 warning to be generated). */
1106 static int
1107 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1109 unsigned int ccvt1, ccvt2;
1111 if (TREE_CODE (type1) != FUNCTION_TYPE
1112 && TREE_CODE (type1) != METHOD_TYPE)
1113 return 1;
1115 ccvt1 = ix86_get_callcvt (type1);
1116 ccvt2 = ix86_get_callcvt (type2);
1117 if (ccvt1 != ccvt2)
1118 return 0;
1119 if (ix86_function_regparm (type1, NULL)
1120 != ix86_function_regparm (type2, NULL))
1121 return 0;
1123 return 1;
1126 /* Return the regparm value for a function with the indicated TYPE and DECL.
1127 DECL may be NULL when calling function indirectly
1128 or considering a libcall. */
1130 static int
1131 ix86_function_regparm (const_tree type, const_tree decl)
1133 tree attr;
1134 int regparm;
1135 unsigned int ccvt;
1137 if (TARGET_64BIT)
1138 return (ix86_function_type_abi (type) == SYSV_ABI
1139 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1140 ccvt = ix86_get_callcvt (type);
1141 regparm = ix86_regparm;
1143 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1145 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1146 if (attr)
1148 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1149 return regparm;
1152 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1153 return 2;
1154 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1155 return 1;
1157 /* Use register calling convention for local functions when possible. */
1158 if (decl
1159 && TREE_CODE (decl) == FUNCTION_DECL)
1161 cgraph_node *target = cgraph_node::get (decl);
1162 if (target)
1163 target = target->function_symbol ();
1165 /* Caller and callee must agree on the calling convention, so
1166 checking here just optimize means that with
1167 __attribute__((optimize (...))) caller could use regparm convention
1168 and callee not, or vice versa. Instead look at whether the callee
1169 is optimized or not. */
1170 if (target && opt_for_fn (target->decl, optimize)
1171 && !(profile_flag && !flag_fentry))
1173 if (target->local && target->can_change_signature)
1175 int local_regparm, globals = 0, regno;
1177 /* Make sure no regparm register is taken by a
1178 fixed register variable. */
1179 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1180 local_regparm++)
1181 if (fixed_regs[local_regparm])
1182 break;
1184 /* We don't want to use regparm(3) for nested functions as
1185 these use a static chain pointer in the third argument. */
1186 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1187 local_regparm = 2;
1189 /* Save a register for the split stack. */
1190 if (flag_split_stack)
1192 if (local_regparm == 3)
1193 local_regparm = 2;
1194 else if (local_regparm == 2
1195 && DECL_STATIC_CHAIN (target->decl))
1196 local_regparm = 1;
1199 /* Each fixed register usage increases register pressure,
1200 so less registers should be used for argument passing.
1201 This functionality can be overriden by an explicit
1202 regparm value. */
1203 for (regno = AX_REG; regno <= DI_REG; regno++)
1204 if (fixed_regs[regno])
1205 globals++;
1207 local_regparm
1208 = globals < local_regparm ? local_regparm - globals : 0;
1210 if (local_regparm > regparm)
1211 regparm = local_regparm;
1216 return regparm;
1219 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1220 DFmode (2) arguments in SSE registers for a function with the
1221 indicated TYPE and DECL. DECL may be NULL when calling function
1222 indirectly or considering a libcall. Return -1 if any FP parameter
1223 should be rejected by error. This is used in siutation we imply SSE
1224 calling convetion but the function is called from another function with
1225 SSE disabled. Otherwise return 0. */
1227 static int
1228 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1230 gcc_assert (!TARGET_64BIT);
1232 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1233 by the sseregparm attribute. */
1234 if (TARGET_SSEREGPARM
1235 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1237 if (!TARGET_SSE)
1239 if (warn)
1241 if (decl)
1242 error ("calling %qD with attribute sseregparm without "
1243 "SSE/SSE2 enabled", decl);
1244 else
1245 error ("calling %qT with attribute sseregparm without "
1246 "SSE/SSE2 enabled", type);
1248 return 0;
1251 return 2;
1254 if (!decl)
1255 return 0;
1257 cgraph_node *target = cgraph_node::get (decl);
1258 if (target)
1259 target = target->function_symbol ();
1261 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1262 (and DFmode for SSE2) arguments in SSE registers. */
1263 if (target
1264 /* TARGET_SSE_MATH */
1265 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1266 && opt_for_fn (target->decl, optimize)
1267 && !(profile_flag && !flag_fentry))
1269 if (target->local && target->can_change_signature)
1271 /* Refuse to produce wrong code when local function with SSE enabled
1272 is called from SSE disabled function.
1273 FIXME: We need a way to detect these cases cross-ltrans partition
1274 and avoid using SSE calling conventions on local functions called
1275 from function with SSE disabled. For now at least delay the
1276 warning until we know we are going to produce wrong code.
1277 See PR66047 */
1278 if (!TARGET_SSE && warn)
1279 return -1;
1280 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1281 ->x_ix86_isa_flags) ? 2 : 1;
1285 return 0;
1288 /* Return true if EAX is live at the start of the function. Used by
1289 ix86_expand_prologue to determine if we need special help before
1290 calling allocate_stack_worker. */
1292 static bool
1293 ix86_eax_live_at_start_p (void)
1295 /* Cheat. Don't bother working forward from ix86_function_regparm
1296 to the function type to whether an actual argument is located in
1297 eax. Instead just look at cfg info, which is still close enough
1298 to correct at this point. This gives false positives for broken
1299 functions that might use uninitialized data that happens to be
1300 allocated in eax, but who cares? */
1301 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1304 static bool
1305 ix86_keep_aggregate_return_pointer (tree fntype)
1307 tree attr;
1309 if (!TARGET_64BIT)
1311 attr = lookup_attribute ("callee_pop_aggregate_return",
1312 TYPE_ATTRIBUTES (fntype));
1313 if (attr)
1314 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1316 /* For 32-bit MS-ABI the default is to keep aggregate
1317 return pointer. */
1318 if (ix86_function_type_abi (fntype) == MS_ABI)
1319 return true;
1321 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1324 /* Value is the number of bytes of arguments automatically
1325 popped when returning from a subroutine call.
1326 FUNDECL is the declaration node of the function (as a tree),
1327 FUNTYPE is the data type of the function (as a tree),
1328 or for a library call it is an identifier node for the subroutine name.
1329 SIZE is the number of bytes of arguments passed on the stack.
1331 On the 80386, the RTD insn may be used to pop them if the number
1332 of args is fixed, but if the number is variable then the caller
1333 must pop them all. RTD can't be used for library calls now
1334 because the library is compiled with the Unix compiler.
1335 Use of RTD is a selectable option, since it is incompatible with
1336 standard Unix calling sequences. If the option is not selected,
1337 the caller must always pop the args.
1339 The attribute stdcall is equivalent to RTD on a per module basis. */
1341 static poly_int64
1342 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1344 unsigned int ccvt;
1346 /* None of the 64-bit ABIs pop arguments. */
1347 if (TARGET_64BIT)
1348 return 0;
1350 ccvt = ix86_get_callcvt (funtype);
1352 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1353 | IX86_CALLCVT_THISCALL)) != 0
1354 && ! stdarg_p (funtype))
1355 return size;
1357 /* Lose any fake structure return argument if it is passed on the stack. */
1358 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1359 && !ix86_keep_aggregate_return_pointer (funtype))
1361 int nregs = ix86_function_regparm (funtype, fundecl);
1362 if (nregs == 0)
1363 return GET_MODE_SIZE (Pmode);
1366 return 0;
1369 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1371 static bool
1372 ix86_legitimate_combined_insn (rtx_insn *insn)
1374 int i;
1376 /* Check operand constraints in case hard registers were propagated
1377 into insn pattern. This check prevents combine pass from
1378 generating insn patterns with invalid hard register operands.
1379 These invalid insns can eventually confuse reload to error out
1380 with a spill failure. See also PRs 46829 and 46843. */
1382 gcc_assert (INSN_CODE (insn) >= 0);
1384 extract_insn (insn);
1385 preprocess_constraints (insn);
1387 int n_operands = recog_data.n_operands;
1388 int n_alternatives = recog_data.n_alternatives;
1389 for (i = 0; i < n_operands; i++)
1391 rtx op = recog_data.operand[i];
1392 machine_mode mode = GET_MODE (op);
1393 const operand_alternative *op_alt;
1394 int offset = 0;
1395 bool win;
1396 int j;
1398 /* A unary operator may be accepted by the predicate, but it
1399 is irrelevant for matching constraints. */
1400 if (UNARY_P (op))
1401 op = XEXP (op, 0);
1403 if (SUBREG_P (op))
1405 if (REG_P (SUBREG_REG (op))
1406 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1407 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1408 GET_MODE (SUBREG_REG (op)),
1409 SUBREG_BYTE (op),
1410 GET_MODE (op));
1411 op = SUBREG_REG (op);
1414 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1415 continue;
1417 op_alt = recog_op_alt;
1419 /* Operand has no constraints, anything is OK. */
1420 win = !n_alternatives;
1422 alternative_mask preferred = get_preferred_alternatives (insn);
1423 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1425 if (!TEST_BIT (preferred, j))
1426 continue;
1427 if (op_alt[i].anything_ok
1428 || (op_alt[i].matches != -1
1429 && operands_match_p
1430 (recog_data.operand[i],
1431 recog_data.operand[op_alt[i].matches]))
1432 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1434 win = true;
1435 break;
1439 if (!win)
1440 return false;
1443 return true;
1446 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1448 static unsigned HOST_WIDE_INT
1449 ix86_asan_shadow_offset (void)
1451 return SUBTARGET_SHADOW_OFFSET;
1454 /* Argument support functions. */
1456 /* Return true when register may be used to pass function parameters. */
1457 bool
1458 ix86_function_arg_regno_p (int regno)
1460 int i;
1461 enum calling_abi call_abi;
1462 const int *parm_regs;
1464 if (TARGET_SSE && SSE_REGNO_P (regno)
1465 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1466 return true;
1468 if (!TARGET_64BIT)
1469 return (regno < REGPARM_MAX
1470 || (TARGET_MMX && MMX_REGNO_P (regno)
1471 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1473 /* TODO: The function should depend on current function ABI but
1474 builtins.cc would need updating then. Therefore we use the
1475 default ABI. */
1476 call_abi = ix86_cfun_abi ();
1478 /* RAX is used as hidden argument to va_arg functions. */
1479 if (call_abi == SYSV_ABI && regno == AX_REG)
1480 return true;
1482 if (call_abi == MS_ABI)
1483 parm_regs = x86_64_ms_abi_int_parameter_registers;
1484 else
1485 parm_regs = x86_64_int_parameter_registers;
1487 for (i = 0; i < (call_abi == MS_ABI
1488 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1489 if (regno == parm_regs[i])
1490 return true;
1491 return false;
1494 /* Return if we do not know how to pass ARG solely in registers. */
1496 static bool
1497 ix86_must_pass_in_stack (const function_arg_info &arg)
1499 if (must_pass_in_stack_var_size_or_pad (arg))
1500 return true;
1502 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1503 The layout_type routine is crafty and tries to trick us into passing
1504 currently unsupported vector types on the stack by using TImode. */
1505 return (!TARGET_64BIT && arg.mode == TImode
1506 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1509 /* It returns the size, in bytes, of the area reserved for arguments passed
1510 in registers for the function represented by fndecl dependent to the used
1511 abi format. */
1513 ix86_reg_parm_stack_space (const_tree fndecl)
1515 enum calling_abi call_abi = SYSV_ABI;
1516 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1517 call_abi = ix86_function_abi (fndecl);
1518 else
1519 call_abi = ix86_function_type_abi (fndecl);
1520 if (TARGET_64BIT && call_abi == MS_ABI)
1521 return 32;
1522 return 0;
1525 /* We add this as a workaround in order to use libc_has_function
1526 hook in i386.md. */
1527 bool
1528 ix86_libc_has_function (enum function_class fn_class)
1530 return targetm.libc_has_function (fn_class, NULL_TREE);
1533 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1534 specifying the call abi used. */
1535 enum calling_abi
1536 ix86_function_type_abi (const_tree fntype)
1538 enum calling_abi abi = ix86_abi;
1540 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1541 return abi;
1543 if (abi == SYSV_ABI
1544 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1546 static int warned;
1547 if (TARGET_X32 && !warned)
1549 error ("X32 does not support %<ms_abi%> attribute");
1550 warned = 1;
1553 abi = MS_ABI;
1555 else if (abi == MS_ABI
1556 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1557 abi = SYSV_ABI;
1559 return abi;
1562 enum calling_abi
1563 ix86_function_abi (const_tree fndecl)
1565 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1568 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1569 specifying the call abi used. */
1570 enum calling_abi
1571 ix86_cfun_abi (void)
1573 return cfun ? cfun->machine->call_abi : ix86_abi;
1576 bool
1577 ix86_function_ms_hook_prologue (const_tree fn)
1579 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1581 if (decl_function_context (fn) != NULL_TREE)
1582 error_at (DECL_SOURCE_LOCATION (fn),
1583 "%<ms_hook_prologue%> attribute is not compatible "
1584 "with nested function");
1585 else
1586 return true;
1588 return false;
1591 bool
1592 ix86_function_naked (const_tree fn)
1594 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1595 return true;
1597 return false;
1600 /* Write the extra assembler code needed to declare a function properly. */
1602 void
1603 ix86_asm_output_function_label (FILE *out_file, const char *fname,
1604 tree decl)
1606 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1608 if (cfun)
1609 cfun->machine->function_label_emitted = true;
1611 if (is_ms_hook)
1613 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1614 unsigned int filler_cc = 0xcccccccc;
1616 for (i = 0; i < filler_count; i += 4)
1617 fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1620 #ifdef SUBTARGET_ASM_UNWIND_INIT
1621 SUBTARGET_ASM_UNWIND_INIT (out_file);
1622 #endif
1624 ASM_OUTPUT_LABEL (out_file, fname);
1626 /* Output magic byte marker, if hot-patch attribute is set. */
1627 if (is_ms_hook)
1629 if (TARGET_64BIT)
1631 /* leaq [%rsp + 0], %rsp */
1632 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1633 out_file);
1635 else
1637 /* movl.s %edi, %edi
1638 push %ebp
1639 movl.s %esp, %ebp */
1640 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1645 /* Implementation of call abi switching target hook. Specific to FNDECL
1646 the specific call register sets are set. See also
1647 ix86_conditional_register_usage for more details. */
1648 void
1649 ix86_call_abi_override (const_tree fndecl)
1651 cfun->machine->call_abi = ix86_function_abi (fndecl);
1654 /* Return 1 if pseudo register should be created and used to hold
1655 GOT address for PIC code. */
1656 bool
1657 ix86_use_pseudo_pic_reg (void)
1659 if ((TARGET_64BIT
1660 && (ix86_cmodel == CM_SMALL_PIC
1661 || TARGET_PECOFF))
1662 || !flag_pic)
1663 return false;
1664 return true;
1667 /* Initialize large model PIC register. */
1669 static void
1670 ix86_init_large_pic_reg (unsigned int tmp_regno)
1672 rtx_code_label *label;
1673 rtx tmp_reg;
1675 gcc_assert (Pmode == DImode);
1676 label = gen_label_rtx ();
1677 emit_label (label);
1678 LABEL_PRESERVE_P (label) = 1;
1679 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1680 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1681 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1682 label));
1683 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1684 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1685 const char *name = LABEL_NAME (label);
1686 PUT_CODE (label, NOTE);
1687 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1688 NOTE_DELETED_LABEL_NAME (label) = name;
1691 /* Create and initialize PIC register if required. */
1692 static void
1693 ix86_init_pic_reg (void)
1695 edge entry_edge;
1696 rtx_insn *seq;
1698 if (!ix86_use_pseudo_pic_reg ())
1699 return;
1701 start_sequence ();
1703 if (TARGET_64BIT)
1705 if (ix86_cmodel == CM_LARGE_PIC)
1706 ix86_init_large_pic_reg (R11_REG);
1707 else
1708 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1710 else
1712 /* If there is future mcount call in the function it is more profitable
1713 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1714 rtx reg = crtl->profile
1715 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1716 : pic_offset_table_rtx;
1717 rtx_insn *insn = emit_insn (gen_set_got (reg));
1718 RTX_FRAME_RELATED_P (insn) = 1;
1719 if (crtl->profile)
1720 emit_move_insn (pic_offset_table_rtx, reg);
1721 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1724 seq = get_insns ();
1725 end_sequence ();
1727 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1728 insert_insn_on_edge (seq, entry_edge);
1729 commit_one_edge_insertion (entry_edge);
1732 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1733 for a call to a function whose data type is FNTYPE.
1734 For a library call, FNTYPE is 0. */
1736 void
1737 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1738 tree fntype, /* tree ptr for function decl */
1739 rtx libname, /* SYMBOL_REF of library name or 0 */
1740 tree fndecl,
1741 int caller)
1743 struct cgraph_node *local_info_node = NULL;
1744 struct cgraph_node *target = NULL;
1746 /* Set silent_p to false to raise an error for invalid calls when
1747 expanding function body. */
1748 cfun->machine->silent_p = false;
1750 memset (cum, 0, sizeof (*cum));
1752 if (fndecl)
1754 target = cgraph_node::get (fndecl);
1755 if (target)
1757 target = target->function_symbol ();
1758 local_info_node = cgraph_node::local_info_node (target->decl);
1759 cum->call_abi = ix86_function_abi (target->decl);
1761 else
1762 cum->call_abi = ix86_function_abi (fndecl);
1764 else
1765 cum->call_abi = ix86_function_type_abi (fntype);
1767 cum->caller = caller;
1769 /* Set up the number of registers to use for passing arguments. */
1770 cum->nregs = ix86_regparm;
1771 if (TARGET_64BIT)
1773 cum->nregs = (cum->call_abi == SYSV_ABI
1774 ? X86_64_REGPARM_MAX
1775 : X86_64_MS_REGPARM_MAX);
1777 if (TARGET_SSE)
1779 cum->sse_nregs = SSE_REGPARM_MAX;
1780 if (TARGET_64BIT)
1782 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1783 ? X86_64_SSE_REGPARM_MAX
1784 : X86_64_MS_SSE_REGPARM_MAX);
1787 if (TARGET_MMX)
1788 cum->mmx_nregs = MMX_REGPARM_MAX;
1789 cum->warn_avx512f = true;
1790 cum->warn_avx = true;
1791 cum->warn_sse = true;
1792 cum->warn_mmx = true;
1794 /* Because type might mismatch in between caller and callee, we need to
1795 use actual type of function for local calls.
1796 FIXME: cgraph_analyze can be told to actually record if function uses
1797 va_start so for local functions maybe_vaarg can be made aggressive
1798 helping K&R code.
1799 FIXME: once typesytem is fixed, we won't need this code anymore. */
1800 if (local_info_node && local_info_node->local
1801 && local_info_node->can_change_signature)
1802 fntype = TREE_TYPE (target->decl);
1803 cum->stdarg = stdarg_p (fntype);
1804 cum->maybe_vaarg = (fntype
1805 ? (!prototype_p (fntype) || stdarg_p (fntype))
1806 : !libname);
1808 cum->decl = fndecl;
1810 cum->warn_empty = !warn_abi || cum->stdarg;
1811 if (!cum->warn_empty && fntype)
1813 function_args_iterator iter;
1814 tree argtype;
1815 bool seen_empty_type = false;
1816 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1818 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1819 break;
1820 if (TYPE_EMPTY_P (argtype))
1821 seen_empty_type = true;
1822 else if (seen_empty_type)
1824 cum->warn_empty = true;
1825 break;
1830 if (!TARGET_64BIT)
1832 /* If there are variable arguments, then we won't pass anything
1833 in registers in 32-bit mode. */
1834 if (stdarg_p (fntype))
1836 cum->nregs = 0;
1837 /* Since in 32-bit, variable arguments are always passed on
1838 stack, there is scratch register available for indirect
1839 sibcall. */
1840 cfun->machine->arg_reg_available = true;
1841 cum->sse_nregs = 0;
1842 cum->mmx_nregs = 0;
1843 cum->warn_avx512f = false;
1844 cum->warn_avx = false;
1845 cum->warn_sse = false;
1846 cum->warn_mmx = false;
1847 return;
1850 /* Use ecx and edx registers if function has fastcall attribute,
1851 else look for regparm information. */
1852 if (fntype)
1854 unsigned int ccvt = ix86_get_callcvt (fntype);
1855 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1857 cum->nregs = 1;
1858 cum->fastcall = 1; /* Same first register as in fastcall. */
1860 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1862 cum->nregs = 2;
1863 cum->fastcall = 1;
1865 else
1866 cum->nregs = ix86_function_regparm (fntype, fndecl);
1869 /* Set up the number of SSE registers used for passing SFmode
1870 and DFmode arguments. Warn for mismatching ABI. */
1871 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1874 cfun->machine->arg_reg_available = (cum->nregs > 0);
1877 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1878 But in the case of vector types, it is some vector mode.
1880 When we have only some of our vector isa extensions enabled, then there
1881 are some modes for which vector_mode_supported_p is false. For these
1882 modes, the generic vector support in gcc will choose some non-vector mode
1883 in order to implement the type. By computing the natural mode, we'll
1884 select the proper ABI location for the operand and not depend on whatever
1885 the middle-end decides to do with these vector types.
1887 The midde-end can't deal with the vector types > 16 bytes. In this
1888 case, we return the original mode and warn ABI change if CUM isn't
1889 NULL.
1891 If INT_RETURN is true, warn ABI change if the vector mode isn't
1892 available for function return value. */
1894 static machine_mode
1895 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1896 bool in_return)
1898 machine_mode mode = TYPE_MODE (type);
1900 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1902 HOST_WIDE_INT size = int_size_in_bytes (type);
1903 if ((size == 8 || size == 16 || size == 32 || size == 64)
1904 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1905 && TYPE_VECTOR_SUBPARTS (type) > 1)
1907 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1909 /* There are no XFmode vector modes ... */
1910 if (innermode == XFmode)
1911 return mode;
1913 /* ... and no decimal float vector modes. */
1914 if (DECIMAL_FLOAT_MODE_P (innermode))
1915 return mode;
1917 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
1918 mode = MIN_MODE_VECTOR_FLOAT;
1919 else
1920 mode = MIN_MODE_VECTOR_INT;
1922 /* Get the mode which has this inner mode and number of units. */
1923 FOR_EACH_MODE_FROM (mode, mode)
1924 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1925 && GET_MODE_INNER (mode) == innermode)
1927 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1929 static bool warnedavx512f;
1930 static bool warnedavx512f_ret;
1932 if (cum && cum->warn_avx512f && !warnedavx512f)
1934 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1935 "without AVX512F enabled changes the ABI"))
1936 warnedavx512f = true;
1938 else if (in_return && !warnedavx512f_ret)
1940 if (warning (OPT_Wpsabi, "AVX512F vector return "
1941 "without AVX512F enabled changes the ABI"))
1942 warnedavx512f_ret = true;
1945 return TYPE_MODE (type);
1947 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1949 static bool warnedavx;
1950 static bool warnedavx_ret;
1952 if (cum && cum->warn_avx && !warnedavx)
1954 if (warning (OPT_Wpsabi, "AVX vector argument "
1955 "without AVX enabled changes the ABI"))
1956 warnedavx = true;
1958 else if (in_return && !warnedavx_ret)
1960 if (warning (OPT_Wpsabi, "AVX vector return "
1961 "without AVX enabled changes the ABI"))
1962 warnedavx_ret = true;
1965 return TYPE_MODE (type);
1967 else if (((size == 8 && TARGET_64BIT) || size == 16)
1968 && !TARGET_SSE
1969 && !TARGET_IAMCU)
1971 static bool warnedsse;
1972 static bool warnedsse_ret;
1974 if (cum && cum->warn_sse && !warnedsse)
1976 if (warning (OPT_Wpsabi, "SSE vector argument "
1977 "without SSE enabled changes the ABI"))
1978 warnedsse = true;
1980 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1982 if (warning (OPT_Wpsabi, "SSE vector return "
1983 "without SSE enabled changes the ABI"))
1984 warnedsse_ret = true;
1987 else if ((size == 8 && !TARGET_64BIT)
1988 && (!cfun
1989 || cfun->machine->func_type == TYPE_NORMAL)
1990 && !TARGET_MMX
1991 && !TARGET_IAMCU)
1993 static bool warnedmmx;
1994 static bool warnedmmx_ret;
1996 if (cum && cum->warn_mmx && !warnedmmx)
1998 if (warning (OPT_Wpsabi, "MMX vector argument "
1999 "without MMX enabled changes the ABI"))
2000 warnedmmx = true;
2002 else if (in_return && !warnedmmx_ret)
2004 if (warning (OPT_Wpsabi, "MMX vector return "
2005 "without MMX enabled changes the ABI"))
2006 warnedmmx_ret = true;
2009 return mode;
2012 gcc_unreachable ();
2016 return mode;
2019 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2020 this may not agree with the mode that the type system has chosen for the
2021 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2022 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2024 static rtx
2025 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2026 unsigned int regno)
2028 rtx tmp;
2030 if (orig_mode != BLKmode)
2031 tmp = gen_rtx_REG (orig_mode, regno);
2032 else
2034 tmp = gen_rtx_REG (mode, regno);
2035 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2036 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2039 return tmp;
2042 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2043 of this code is to classify each 8bytes of incoming argument by the register
2044 class and assign registers accordingly. */
2046 /* Return the union class of CLASS1 and CLASS2.
2047 See the x86-64 PS ABI for details. */
2049 static enum x86_64_reg_class
2050 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2052 /* Rule #1: If both classes are equal, this is the resulting class. */
2053 if (class1 == class2)
2054 return class1;
2056 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2057 the other class. */
2058 if (class1 == X86_64_NO_CLASS)
2059 return class2;
2060 if (class2 == X86_64_NO_CLASS)
2061 return class1;
2063 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2064 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2065 return X86_64_MEMORY_CLASS;
2067 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2068 if ((class1 == X86_64_INTEGERSI_CLASS
2069 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2070 || (class2 == X86_64_INTEGERSI_CLASS
2071 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2072 return X86_64_INTEGERSI_CLASS;
2073 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2074 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2075 return X86_64_INTEGER_CLASS;
2077 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2078 MEMORY is used. */
2079 if (class1 == X86_64_X87_CLASS
2080 || class1 == X86_64_X87UP_CLASS
2081 || class1 == X86_64_COMPLEX_X87_CLASS
2082 || class2 == X86_64_X87_CLASS
2083 || class2 == X86_64_X87UP_CLASS
2084 || class2 == X86_64_COMPLEX_X87_CLASS)
2085 return X86_64_MEMORY_CLASS;
2087 /* Rule #6: Otherwise class SSE is used. */
2088 return X86_64_SSE_CLASS;
2091 /* Classify the argument of type TYPE and mode MODE.
2092 CLASSES will be filled by the register class used to pass each word
2093 of the operand. The number of words is returned. In case the parameter
2094 should be passed in memory, 0 is returned. As a special case for zero
2095 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2097 BIT_OFFSET is used internally for handling records and specifies offset
2098 of the offset in bits modulo 512 to avoid overflow cases.
2100 See the x86-64 PS ABI for details.
2103 static int
2104 classify_argument (machine_mode mode, const_tree type,
2105 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2106 int &zero_width_bitfields)
2108 HOST_WIDE_INT bytes
2109 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2110 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2112 /* Variable sized entities are always passed/returned in memory. */
2113 if (bytes < 0)
2114 return 0;
2116 if (mode != VOIDmode)
2118 /* The value of "named" doesn't matter. */
2119 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2120 if (targetm.calls.must_pass_in_stack (arg))
2121 return 0;
2124 if (type && (AGGREGATE_TYPE_P (type)
2125 || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
2127 int i;
2128 tree field;
2129 enum x86_64_reg_class subclasses[MAX_CLASSES];
2131 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2132 if (bytes > 64)
2133 return 0;
2135 for (i = 0; i < words; i++)
2136 classes[i] = X86_64_NO_CLASS;
2138 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2139 signalize memory class, so handle it as special case. */
2140 if (!words)
2142 classes[0] = X86_64_NO_CLASS;
2143 return 1;
2146 /* Classify each field of record and merge classes. */
2147 switch (TREE_CODE (type))
2149 case RECORD_TYPE:
2150 /* And now merge the fields of structure. */
2151 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2153 if (TREE_CODE (field) == FIELD_DECL)
2155 int num;
2157 if (TREE_TYPE (field) == error_mark_node)
2158 continue;
2160 /* Bitfields are always classified as integer. Handle them
2161 early, since later code would consider them to be
2162 misaligned integers. */
2163 if (DECL_BIT_FIELD (field))
2165 if (integer_zerop (DECL_SIZE (field)))
2167 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2168 continue;
2169 if (zero_width_bitfields != 2)
2171 zero_width_bitfields = 1;
2172 continue;
2175 for (i = (int_bit_position (field)
2176 + (bit_offset % 64)) / 8 / 8;
2177 i < ((int_bit_position (field) + (bit_offset % 64))
2178 + tree_to_shwi (DECL_SIZE (field))
2179 + 63) / 8 / 8; i++)
2180 classes[i]
2181 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2183 else
2185 int pos;
2187 type = TREE_TYPE (field);
2189 /* Flexible array member is ignored. */
2190 if (TYPE_MODE (type) == BLKmode
2191 && TREE_CODE (type) == ARRAY_TYPE
2192 && TYPE_SIZE (type) == NULL_TREE
2193 && TYPE_DOMAIN (type) != NULL_TREE
2194 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2195 == NULL_TREE))
2197 static bool warned;
2199 if (!warned && warn_psabi)
2201 warned = true;
2202 inform (input_location,
2203 "the ABI of passing struct with"
2204 " a flexible array member has"
2205 " changed in GCC 4.4");
2207 continue;
2209 num = classify_argument (TYPE_MODE (type), type,
2210 subclasses,
2211 (int_bit_position (field)
2212 + bit_offset) % 512,
2213 zero_width_bitfields);
2214 if (!num)
2215 return 0;
2216 pos = (int_bit_position (field)
2217 + (bit_offset % 64)) / 8 / 8;
2218 for (i = 0; i < num && (i + pos) < words; i++)
2219 classes[i + pos]
2220 = merge_classes (subclasses[i], classes[i + pos]);
2224 break;
2226 case ARRAY_TYPE:
2227 /* Arrays are handled as small records. */
2229 int num;
2230 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2231 TREE_TYPE (type), subclasses, bit_offset,
2232 zero_width_bitfields);
2233 if (!num)
2234 return 0;
2236 /* The partial classes are now full classes. */
2237 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2238 subclasses[0] = X86_64_SSE_CLASS;
2239 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2240 subclasses[0] = X86_64_SSE_CLASS;
2241 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2242 && !((bit_offset % 64) == 0 && bytes == 4))
2243 subclasses[0] = X86_64_INTEGER_CLASS;
2245 for (i = 0; i < words; i++)
2246 classes[i] = subclasses[i % num];
2248 break;
2250 case UNION_TYPE:
2251 case QUAL_UNION_TYPE:
2252 /* Unions are similar to RECORD_TYPE but offset is always 0.
2254 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2256 if (TREE_CODE (field) == FIELD_DECL)
2258 int num;
2260 if (TREE_TYPE (field) == error_mark_node)
2261 continue;
2263 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2264 TREE_TYPE (field), subclasses,
2265 bit_offset, zero_width_bitfields);
2266 if (!num)
2267 return 0;
2268 for (i = 0; i < num && i < words; i++)
2269 classes[i] = merge_classes (subclasses[i], classes[i]);
2272 break;
2274 case BITINT_TYPE:
2275 /* _BitInt(N) for N > 64 is passed as structure containing
2276 (N + 63) / 64 64-bit elements. */
2277 if (words > 2)
2278 return 0;
2279 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2280 return 2;
2282 default:
2283 gcc_unreachable ();
2286 if (words > 2)
2288 /* When size > 16 bytes, if the first one isn't
2289 X86_64_SSE_CLASS or any other ones aren't
2290 X86_64_SSEUP_CLASS, everything should be passed in
2291 memory. */
2292 if (classes[0] != X86_64_SSE_CLASS)
2293 return 0;
2295 for (i = 1; i < words; i++)
2296 if (classes[i] != X86_64_SSEUP_CLASS)
2297 return 0;
2300 /* Final merger cleanup. */
2301 for (i = 0; i < words; i++)
2303 /* If one class is MEMORY, everything should be passed in
2304 memory. */
2305 if (classes[i] == X86_64_MEMORY_CLASS)
2306 return 0;
2308 /* The X86_64_SSEUP_CLASS should be always preceded by
2309 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2310 if (classes[i] == X86_64_SSEUP_CLASS
2311 && classes[i - 1] != X86_64_SSE_CLASS
2312 && classes[i - 1] != X86_64_SSEUP_CLASS)
2314 /* The first one should never be X86_64_SSEUP_CLASS. */
2315 gcc_assert (i != 0);
2316 classes[i] = X86_64_SSE_CLASS;
2319 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2320 everything should be passed in memory. */
2321 if (classes[i] == X86_64_X87UP_CLASS
2322 && (classes[i - 1] != X86_64_X87_CLASS))
2324 static bool warned;
2326 /* The first one should never be X86_64_X87UP_CLASS. */
2327 gcc_assert (i != 0);
2328 if (!warned && warn_psabi)
2330 warned = true;
2331 inform (input_location,
2332 "the ABI of passing union with %<long double%>"
2333 " has changed in GCC 4.4");
2335 return 0;
2338 return words;
2341 /* Compute alignment needed. We align all types to natural boundaries with
2342 exception of XFmode that is aligned to 64bits. */
2343 if (mode != VOIDmode && mode != BLKmode)
2345 int mode_alignment = GET_MODE_BITSIZE (mode);
2347 if (mode == XFmode)
2348 mode_alignment = 128;
2349 else if (mode == XCmode)
2350 mode_alignment = 256;
2351 if (COMPLEX_MODE_P (mode))
2352 mode_alignment /= 2;
2353 /* Misaligned fields are always returned in memory. */
2354 if (bit_offset % mode_alignment)
2355 return 0;
2358 /* for V1xx modes, just use the base mode */
2359 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2360 && GET_MODE_UNIT_SIZE (mode) == bytes)
2361 mode = GET_MODE_INNER (mode);
2363 /* Classification of atomic types. */
2364 switch (mode)
2366 case E_SDmode:
2367 case E_DDmode:
2368 classes[0] = X86_64_SSE_CLASS;
2369 return 1;
2370 case E_TDmode:
2371 classes[0] = X86_64_SSE_CLASS;
2372 classes[1] = X86_64_SSEUP_CLASS;
2373 return 2;
2374 case E_DImode:
2375 case E_SImode:
2376 case E_HImode:
2377 case E_QImode:
2378 case E_CSImode:
2379 case E_CHImode:
2380 case E_CQImode:
2382 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2384 /* Analyze last 128 bits only. */
2385 size = (size - 1) & 0x7f;
2387 if (size < 32)
2389 classes[0] = X86_64_INTEGERSI_CLASS;
2390 return 1;
2392 else if (size < 64)
2394 classes[0] = X86_64_INTEGER_CLASS;
2395 return 1;
2397 else if (size < 64+32)
2399 classes[0] = X86_64_INTEGER_CLASS;
2400 classes[1] = X86_64_INTEGERSI_CLASS;
2401 return 2;
2403 else if (size < 64+64)
2405 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2406 return 2;
2408 else
2409 gcc_unreachable ();
2411 case E_CDImode:
2412 case E_TImode:
2413 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2414 return 2;
2415 case E_COImode:
2416 case E_OImode:
2417 /* OImode shouldn't be used directly. */
2418 gcc_unreachable ();
2419 case E_CTImode:
2420 return 0;
2421 case E_HFmode:
2422 case E_BFmode:
2423 if (!(bit_offset % 64))
2424 classes[0] = X86_64_SSEHF_CLASS;
2425 else
2426 classes[0] = X86_64_SSE_CLASS;
2427 return 1;
2428 case E_SFmode:
2429 if (!(bit_offset % 64))
2430 classes[0] = X86_64_SSESF_CLASS;
2431 else
2432 classes[0] = X86_64_SSE_CLASS;
2433 return 1;
2434 case E_DFmode:
2435 classes[0] = X86_64_SSEDF_CLASS;
2436 return 1;
2437 case E_XFmode:
2438 classes[0] = X86_64_X87_CLASS;
2439 classes[1] = X86_64_X87UP_CLASS;
2440 return 2;
2441 case E_TFmode:
2442 classes[0] = X86_64_SSE_CLASS;
2443 classes[1] = X86_64_SSEUP_CLASS;
2444 return 2;
2445 case E_HCmode:
2446 case E_BCmode:
2447 classes[0] = X86_64_SSE_CLASS;
2448 if (!(bit_offset % 64))
2449 return 1;
2450 else
2452 classes[1] = X86_64_SSEHF_CLASS;
2453 return 2;
2455 case E_SCmode:
2456 classes[0] = X86_64_SSE_CLASS;
2457 if (!(bit_offset % 64))
2458 return 1;
2459 else
2461 static bool warned;
2463 if (!warned && warn_psabi)
2465 warned = true;
2466 inform (input_location,
2467 "the ABI of passing structure with %<complex float%>"
2468 " member has changed in GCC 4.4");
2470 classes[1] = X86_64_SSESF_CLASS;
2471 return 2;
2473 case E_DCmode:
2474 classes[0] = X86_64_SSEDF_CLASS;
2475 classes[1] = X86_64_SSEDF_CLASS;
2476 return 2;
2477 case E_XCmode:
2478 classes[0] = X86_64_COMPLEX_X87_CLASS;
2479 return 1;
2480 case E_TCmode:
2481 /* This modes is larger than 16 bytes. */
2482 return 0;
2483 case E_V8SFmode:
2484 case E_V8SImode:
2485 case E_V32QImode:
2486 case E_V16HFmode:
2487 case E_V16BFmode:
2488 case E_V16HImode:
2489 case E_V4DFmode:
2490 case E_V4DImode:
2491 classes[0] = X86_64_SSE_CLASS;
2492 classes[1] = X86_64_SSEUP_CLASS;
2493 classes[2] = X86_64_SSEUP_CLASS;
2494 classes[3] = X86_64_SSEUP_CLASS;
2495 return 4;
2496 case E_V8DFmode:
2497 case E_V16SFmode:
2498 case E_V32HFmode:
2499 case E_V32BFmode:
2500 case E_V8DImode:
2501 case E_V16SImode:
2502 case E_V32HImode:
2503 case E_V64QImode:
2504 classes[0] = X86_64_SSE_CLASS;
2505 classes[1] = X86_64_SSEUP_CLASS;
2506 classes[2] = X86_64_SSEUP_CLASS;
2507 classes[3] = X86_64_SSEUP_CLASS;
2508 classes[4] = X86_64_SSEUP_CLASS;
2509 classes[5] = X86_64_SSEUP_CLASS;
2510 classes[6] = X86_64_SSEUP_CLASS;
2511 classes[7] = X86_64_SSEUP_CLASS;
2512 return 8;
2513 case E_V4SFmode:
2514 case E_V4SImode:
2515 case E_V16QImode:
2516 case E_V8HImode:
2517 case E_V8HFmode:
2518 case E_V8BFmode:
2519 case E_V2DFmode:
2520 case E_V2DImode:
2521 classes[0] = X86_64_SSE_CLASS;
2522 classes[1] = X86_64_SSEUP_CLASS;
2523 return 2;
2524 case E_V1TImode:
2525 case E_V1DImode:
2526 case E_V2SFmode:
2527 case E_V2SImode:
2528 case E_V4HImode:
2529 case E_V4HFmode:
2530 case E_V4BFmode:
2531 case E_V2HFmode:
2532 case E_V2BFmode:
2533 case E_V8QImode:
2534 classes[0] = X86_64_SSE_CLASS;
2535 return 1;
2536 case E_BLKmode:
2537 case E_VOIDmode:
2538 return 0;
2539 default:
2540 gcc_assert (VECTOR_MODE_P (mode));
2542 if (bytes > 16)
2543 return 0;
2545 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2547 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2548 classes[0] = X86_64_INTEGERSI_CLASS;
2549 else
2550 classes[0] = X86_64_INTEGER_CLASS;
2551 classes[1] = X86_64_INTEGER_CLASS;
2552 return 1 + (bytes > 8);
2556 /* Wrapper around classify_argument with the extra zero_width_bitfields
2557 argument, to diagnose GCC 12.1 ABI differences for C. */
2559 static int
2560 classify_argument (machine_mode mode, const_tree type,
2561 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2563 int zero_width_bitfields = 0;
2564 static bool warned = false;
2565 int n = classify_argument (mode, type, classes, bit_offset,
2566 zero_width_bitfields);
2567 if (!zero_width_bitfields || warned || !warn_psabi)
2568 return n;
2569 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2570 zero_width_bitfields = 2;
2571 if (classify_argument (mode, type, alt_classes, bit_offset,
2572 zero_width_bitfields) != n)
2573 zero_width_bitfields = 3;
2574 else
2575 for (int i = 0; i < n; i++)
2576 if (classes[i] != alt_classes[i])
2578 zero_width_bitfields = 3;
2579 break;
2581 if (zero_width_bitfields == 3)
2583 warned = true;
2584 const char *url
2585 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2587 inform (input_location,
2588 "the ABI of passing C structures with zero-width bit-fields"
2589 " has changed in GCC %{12.1%}", url);
2591 return n;
2594 /* Examine the argument and return set number of register required in each
2595 class. Return true iff parameter should be passed in memory. */
2597 static bool
2598 examine_argument (machine_mode mode, const_tree type, int in_return,
2599 int *int_nregs, int *sse_nregs)
2601 enum x86_64_reg_class regclass[MAX_CLASSES];
2602 int n = classify_argument (mode, type, regclass, 0);
2604 *int_nregs = 0;
2605 *sse_nregs = 0;
2607 if (!n)
2608 return true;
2609 for (n--; n >= 0; n--)
2610 switch (regclass[n])
2612 case X86_64_INTEGER_CLASS:
2613 case X86_64_INTEGERSI_CLASS:
2614 (*int_nregs)++;
2615 break;
2616 case X86_64_SSE_CLASS:
2617 case X86_64_SSEHF_CLASS:
2618 case X86_64_SSESF_CLASS:
2619 case X86_64_SSEDF_CLASS:
2620 (*sse_nregs)++;
2621 break;
2622 case X86_64_NO_CLASS:
2623 case X86_64_SSEUP_CLASS:
2624 break;
2625 case X86_64_X87_CLASS:
2626 case X86_64_X87UP_CLASS:
2627 case X86_64_COMPLEX_X87_CLASS:
2628 if (!in_return)
2629 return true;
2630 break;
2631 case X86_64_MEMORY_CLASS:
2632 gcc_unreachable ();
2635 return false;
2638 /* Construct container for the argument used by GCC interface. See
2639 FUNCTION_ARG for the detailed description. */
2641 static rtx
2642 construct_container (machine_mode mode, machine_mode orig_mode,
2643 const_tree type, int in_return, int nintregs, int nsseregs,
2644 const int *intreg, int sse_regno)
2646 /* The following variables hold the static issued_error state. */
2647 static bool issued_sse_arg_error;
2648 static bool issued_sse_ret_error;
2649 static bool issued_x87_ret_error;
2651 machine_mode tmpmode;
2652 int bytes
2653 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2654 enum x86_64_reg_class regclass[MAX_CLASSES];
2655 int n;
2656 int i;
2657 int nexps = 0;
2658 int needed_sseregs, needed_intregs;
2659 rtx exp[MAX_CLASSES];
2660 rtx ret;
2662 n = classify_argument (mode, type, regclass, 0);
2663 if (!n)
2664 return NULL;
2665 if (examine_argument (mode, type, in_return, &needed_intregs,
2666 &needed_sseregs))
2667 return NULL;
2668 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2669 return NULL;
2671 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2672 some less clueful developer tries to use floating-point anyway. */
2673 if (needed_sseregs
2674 && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
2676 /* Return early if we shouldn't raise an error for invalid
2677 calls. */
2678 if (cfun != NULL && cfun->machine->silent_p)
2679 return NULL;
2680 if (in_return)
2682 if (!issued_sse_ret_error)
2684 if (VALID_SSE2_TYPE_MODE (mode))
2685 error ("SSE register return with SSE2 disabled");
2686 else
2687 error ("SSE register return with SSE disabled");
2688 issued_sse_ret_error = true;
2691 else if (!issued_sse_arg_error)
2693 if (VALID_SSE2_TYPE_MODE (mode))
2694 error ("SSE register argument with SSE2 disabled");
2695 else
2696 error ("SSE register argument with SSE disabled");
2697 issued_sse_arg_error = true;
2699 return NULL;
2702 /* Likewise, error if the ABI requires us to return values in the
2703 x87 registers and the user specified -mno-80387. */
2704 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2705 for (i = 0; i < n; i++)
2706 if (regclass[i] == X86_64_X87_CLASS
2707 || regclass[i] == X86_64_X87UP_CLASS
2708 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2710 /* Return early if we shouldn't raise an error for invalid
2711 calls. */
2712 if (cfun != NULL && cfun->machine->silent_p)
2713 return NULL;
2714 if (!issued_x87_ret_error)
2716 error ("x87 register return with x87 disabled");
2717 issued_x87_ret_error = true;
2719 return NULL;
2722 /* First construct simple cases. Avoid SCmode, since we want to use
2723 single register to pass this type. */
2724 if (n == 1 && mode != SCmode && mode != HCmode)
2725 switch (regclass[0])
2727 case X86_64_INTEGER_CLASS:
2728 case X86_64_INTEGERSI_CLASS:
2729 return gen_rtx_REG (mode, intreg[0]);
2730 case X86_64_SSE_CLASS:
2731 case X86_64_SSEHF_CLASS:
2732 case X86_64_SSESF_CLASS:
2733 case X86_64_SSEDF_CLASS:
2734 if (mode != BLKmode)
2735 return gen_reg_or_parallel (mode, orig_mode,
2736 GET_SSE_REGNO (sse_regno));
2737 break;
2738 case X86_64_X87_CLASS:
2739 case X86_64_COMPLEX_X87_CLASS:
2740 return gen_rtx_REG (mode, FIRST_STACK_REG);
2741 case X86_64_NO_CLASS:
2742 /* Zero sized array, struct or class. */
2743 return NULL;
2744 default:
2745 gcc_unreachable ();
2747 if (n == 2
2748 && regclass[0] == X86_64_SSE_CLASS
2749 && regclass[1] == X86_64_SSEUP_CLASS
2750 && mode != BLKmode)
2751 return gen_reg_or_parallel (mode, orig_mode,
2752 GET_SSE_REGNO (sse_regno));
2753 if (n == 4
2754 && regclass[0] == X86_64_SSE_CLASS
2755 && regclass[1] == X86_64_SSEUP_CLASS
2756 && regclass[2] == X86_64_SSEUP_CLASS
2757 && regclass[3] == X86_64_SSEUP_CLASS
2758 && mode != BLKmode)
2759 return gen_reg_or_parallel (mode, orig_mode,
2760 GET_SSE_REGNO (sse_regno));
2761 if (n == 8
2762 && regclass[0] == X86_64_SSE_CLASS
2763 && regclass[1] == X86_64_SSEUP_CLASS
2764 && regclass[2] == X86_64_SSEUP_CLASS
2765 && regclass[3] == X86_64_SSEUP_CLASS
2766 && regclass[4] == X86_64_SSEUP_CLASS
2767 && regclass[5] == X86_64_SSEUP_CLASS
2768 && regclass[6] == X86_64_SSEUP_CLASS
2769 && regclass[7] == X86_64_SSEUP_CLASS
2770 && mode != BLKmode)
2771 return gen_reg_or_parallel (mode, orig_mode,
2772 GET_SSE_REGNO (sse_regno));
2773 if (n == 2
2774 && regclass[0] == X86_64_X87_CLASS
2775 && regclass[1] == X86_64_X87UP_CLASS)
2776 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2778 if (n == 2
2779 && regclass[0] == X86_64_INTEGER_CLASS
2780 && regclass[1] == X86_64_INTEGER_CLASS
2781 && (mode == CDImode || mode == TImode || mode == BLKmode)
2782 && intreg[0] + 1 == intreg[1])
2784 if (mode == BLKmode)
2786 /* Use TImode for BLKmode values in 2 integer registers. */
2787 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2788 gen_rtx_REG (TImode, intreg[0]),
2789 GEN_INT (0));
2790 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2791 XVECEXP (ret, 0, 0) = exp[0];
2792 return ret;
2794 else
2795 return gen_rtx_REG (mode, intreg[0]);
2798 /* Otherwise figure out the entries of the PARALLEL. */
2799 for (i = 0; i < n; i++)
2801 int pos;
2803 switch (regclass[i])
2805 case X86_64_NO_CLASS:
2806 break;
2807 case X86_64_INTEGER_CLASS:
2808 case X86_64_INTEGERSI_CLASS:
2809 /* Merge TImodes on aligned occasions here too. */
2810 if (i * 8 + 8 > bytes)
2812 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2813 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2814 /* We've requested 24 bytes we
2815 don't have mode for. Use DImode. */
2816 tmpmode = DImode;
2818 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2819 tmpmode = SImode;
2820 else
2821 tmpmode = DImode;
2822 exp [nexps++]
2823 = gen_rtx_EXPR_LIST (VOIDmode,
2824 gen_rtx_REG (tmpmode, *intreg),
2825 GEN_INT (i*8));
2826 intreg++;
2827 break;
2828 case X86_64_SSEHF_CLASS:
2829 tmpmode = (mode == BFmode ? BFmode : HFmode);
2830 exp [nexps++]
2831 = gen_rtx_EXPR_LIST (VOIDmode,
2832 gen_rtx_REG (tmpmode,
2833 GET_SSE_REGNO (sse_regno)),
2834 GEN_INT (i*8));
2835 sse_regno++;
2836 break;
2837 case X86_64_SSESF_CLASS:
2838 exp [nexps++]
2839 = gen_rtx_EXPR_LIST (VOIDmode,
2840 gen_rtx_REG (SFmode,
2841 GET_SSE_REGNO (sse_regno)),
2842 GEN_INT (i*8));
2843 sse_regno++;
2844 break;
2845 case X86_64_SSEDF_CLASS:
2846 exp [nexps++]
2847 = gen_rtx_EXPR_LIST (VOIDmode,
2848 gen_rtx_REG (DFmode,
2849 GET_SSE_REGNO (sse_regno)),
2850 GEN_INT (i*8));
2851 sse_regno++;
2852 break;
2853 case X86_64_SSE_CLASS:
2854 pos = i;
2855 switch (n)
2857 case 1:
2858 tmpmode = DImode;
2859 break;
2860 case 2:
2861 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2863 tmpmode = TImode;
2864 i++;
2866 else
2867 tmpmode = DImode;
2868 break;
2869 case 4:
2870 gcc_assert (i == 0
2871 && regclass[1] == X86_64_SSEUP_CLASS
2872 && regclass[2] == X86_64_SSEUP_CLASS
2873 && regclass[3] == X86_64_SSEUP_CLASS);
2874 tmpmode = OImode;
2875 i += 3;
2876 break;
2877 case 8:
2878 gcc_assert (i == 0
2879 && regclass[1] == X86_64_SSEUP_CLASS
2880 && regclass[2] == X86_64_SSEUP_CLASS
2881 && regclass[3] == X86_64_SSEUP_CLASS
2882 && regclass[4] == X86_64_SSEUP_CLASS
2883 && regclass[5] == X86_64_SSEUP_CLASS
2884 && regclass[6] == X86_64_SSEUP_CLASS
2885 && regclass[7] == X86_64_SSEUP_CLASS);
2886 tmpmode = XImode;
2887 i += 7;
2888 break;
2889 default:
2890 gcc_unreachable ();
2892 exp [nexps++]
2893 = gen_rtx_EXPR_LIST (VOIDmode,
2894 gen_rtx_REG (tmpmode,
2895 GET_SSE_REGNO (sse_regno)),
2896 GEN_INT (pos*8));
2897 sse_regno++;
2898 break;
2899 default:
2900 gcc_unreachable ();
2904 /* Empty aligned struct, union or class. */
2905 if (nexps == 0)
2906 return NULL;
2908 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2909 for (i = 0; i < nexps; i++)
2910 XVECEXP (ret, 0, i) = exp [i];
2911 return ret;
2914 /* Update the data in CUM to advance over an argument of mode MODE
2915 and data type TYPE. (TYPE is null for libcalls where that information
2916 may not be available.)
2918 Return a number of integer regsiters advanced over. */
2920 static int
2921 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2922 const_tree type, HOST_WIDE_INT bytes,
2923 HOST_WIDE_INT words)
2925 int res = 0;
2926 bool error_p = false;
2928 if (TARGET_IAMCU)
2930 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2931 bytes in registers. */
2932 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2933 goto pass_in_reg;
2934 return res;
2937 switch (mode)
2939 default:
2940 break;
2942 case E_BLKmode:
2943 if (bytes < 0)
2944 break;
2945 /* FALLTHRU */
2947 case E_DImode:
2948 case E_SImode:
2949 case E_HImode:
2950 case E_QImode:
2951 pass_in_reg:
2952 cum->words += words;
2953 cum->nregs -= words;
2954 cum->regno += words;
2955 if (cum->nregs >= 0)
2956 res = words;
2957 if (cum->nregs <= 0)
2959 cum->nregs = 0;
2960 cfun->machine->arg_reg_available = false;
2961 cum->regno = 0;
2963 break;
2965 case E_OImode:
2966 /* OImode shouldn't be used directly. */
2967 gcc_unreachable ();
2969 case E_DFmode:
2970 if (cum->float_in_sse == -1)
2971 error_p = true;
2972 if (cum->float_in_sse < 2)
2973 break;
2974 /* FALLTHRU */
2975 case E_SFmode:
2976 if (cum->float_in_sse == -1)
2977 error_p = true;
2978 if (cum->float_in_sse < 1)
2979 break;
2980 /* FALLTHRU */
2982 case E_V16HFmode:
2983 case E_V16BFmode:
2984 case E_V8SFmode:
2985 case E_V8SImode:
2986 case E_V64QImode:
2987 case E_V32HImode:
2988 case E_V16SImode:
2989 case E_V8DImode:
2990 case E_V32HFmode:
2991 case E_V32BFmode:
2992 case E_V16SFmode:
2993 case E_V8DFmode:
2994 case E_V32QImode:
2995 case E_V16HImode:
2996 case E_V4DFmode:
2997 case E_V4DImode:
2998 case E_TImode:
2999 case E_V16QImode:
3000 case E_V8HImode:
3001 case E_V4SImode:
3002 case E_V2DImode:
3003 case E_V8HFmode:
3004 case E_V8BFmode:
3005 case E_V4SFmode:
3006 case E_V2DFmode:
3007 if (!type || !AGGREGATE_TYPE_P (type))
3009 cum->sse_words += words;
3010 cum->sse_nregs -= 1;
3011 cum->sse_regno += 1;
3012 if (cum->sse_nregs <= 0)
3014 cum->sse_nregs = 0;
3015 cum->sse_regno = 0;
3018 break;
3020 case E_V8QImode:
3021 case E_V4HImode:
3022 case E_V4HFmode:
3023 case E_V4BFmode:
3024 case E_V2SImode:
3025 case E_V2SFmode:
3026 case E_V1TImode:
3027 case E_V1DImode:
3028 if (!type || !AGGREGATE_TYPE_P (type))
3030 cum->mmx_words += words;
3031 cum->mmx_nregs -= 1;
3032 cum->mmx_regno += 1;
3033 if (cum->mmx_nregs <= 0)
3035 cum->mmx_nregs = 0;
3036 cum->mmx_regno = 0;
3039 break;
3041 if (error_p)
3043 cum->float_in_sse = 0;
3044 error ("calling %qD with SSE calling convention without "
3045 "SSE/SSE2 enabled", cum->decl);
3046 sorry ("this is a GCC bug that can be worked around by adding "
3047 "attribute used to function called");
3050 return res;
3053 static int
3054 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3055 const_tree type, HOST_WIDE_INT words, bool named)
3057 int int_nregs, sse_nregs;
3059 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3060 if (!named && (VALID_AVX512F_REG_MODE (mode)
3061 || VALID_AVX256_REG_MODE (mode)))
3062 return 0;
3064 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3065 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3067 cum->nregs -= int_nregs;
3068 cum->sse_nregs -= sse_nregs;
3069 cum->regno += int_nregs;
3070 cum->sse_regno += sse_nregs;
3071 return int_nregs;
3073 else
3075 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3076 cum->words = ROUND_UP (cum->words, align);
3077 cum->words += words;
3078 return 0;
3082 static int
3083 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3084 HOST_WIDE_INT words)
3086 /* Otherwise, this should be passed indirect. */
3087 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3089 cum->words += words;
3090 if (cum->nregs > 0)
3092 cum->nregs -= 1;
3093 cum->regno += 1;
3094 return 1;
3096 return 0;
3099 /* Update the data in CUM to advance over argument ARG. */
3101 static void
3102 ix86_function_arg_advance (cumulative_args_t cum_v,
3103 const function_arg_info &arg)
3105 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3106 machine_mode mode = arg.mode;
3107 HOST_WIDE_INT bytes, words;
3108 int nregs;
3110 /* The argument of interrupt handler is a special case and is
3111 handled in ix86_function_arg. */
3112 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3113 return;
3115 bytes = arg.promoted_size_in_bytes ();
3116 words = CEIL (bytes, UNITS_PER_WORD);
3118 if (arg.type)
3119 mode = type_natural_mode (arg.type, NULL, false);
3121 if (TARGET_64BIT)
3123 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3125 if (call_abi == MS_ABI)
3126 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3127 else
3128 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3129 arg.named);
3131 else
3132 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3134 if (!nregs)
3136 /* Track if there are outgoing arguments on stack. */
3137 if (cum->caller)
3138 cfun->machine->outgoing_args_on_stack = true;
3142 /* Define where to put the arguments to a function.
3143 Value is zero to push the argument on the stack,
3144 or a hard register in which to store the argument.
3146 MODE is the argument's machine mode.
3147 TYPE is the data type of the argument (as a tree).
3148 This is null for libcalls where that information may
3149 not be available.
3150 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3151 the preceding args and about the function being called.
3152 NAMED is nonzero if this argument is a named parameter
3153 (otherwise it is an extra parameter matching an ellipsis). */
3155 static rtx
3156 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3157 machine_mode orig_mode, const_tree type,
3158 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3160 bool error_p = false;
3162 /* Avoid the AL settings for the Unix64 ABI. */
3163 if (mode == VOIDmode)
3164 return constm1_rtx;
3166 if (TARGET_IAMCU)
3168 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3169 bytes in registers. */
3170 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3171 goto pass_in_reg;
3172 return NULL_RTX;
3175 switch (mode)
3177 default:
3178 break;
3180 case E_BLKmode:
3181 if (bytes < 0)
3182 break;
3183 /* FALLTHRU */
3184 case E_DImode:
3185 case E_SImode:
3186 case E_HImode:
3187 case E_QImode:
3188 pass_in_reg:
3189 if (words <= cum->nregs)
3191 int regno = cum->regno;
3193 /* Fastcall allocates the first two DWORD (SImode) or
3194 smaller arguments to ECX and EDX if it isn't an
3195 aggregate type . */
3196 if (cum->fastcall)
3198 if (mode == BLKmode
3199 || mode == DImode
3200 || (type && AGGREGATE_TYPE_P (type)))
3201 break;
3203 /* ECX not EAX is the first allocated register. */
3204 if (regno == AX_REG)
3205 regno = CX_REG;
3207 return gen_rtx_REG (mode, regno);
3209 break;
3211 case E_DFmode:
3212 if (cum->float_in_sse == -1)
3213 error_p = true;
3214 if (cum->float_in_sse < 2)
3215 break;
3216 /* FALLTHRU */
3217 case E_SFmode:
3218 if (cum->float_in_sse == -1)
3219 error_p = true;
3220 if (cum->float_in_sse < 1)
3221 break;
3222 /* FALLTHRU */
3223 case E_TImode:
3224 /* In 32bit, we pass TImode in xmm registers. */
3225 case E_V16QImode:
3226 case E_V8HImode:
3227 case E_V4SImode:
3228 case E_V2DImode:
3229 case E_V8HFmode:
3230 case E_V8BFmode:
3231 case E_V4SFmode:
3232 case E_V2DFmode:
3233 if (!type || !AGGREGATE_TYPE_P (type))
3235 if (cum->sse_nregs)
3236 return gen_reg_or_parallel (mode, orig_mode,
3237 cum->sse_regno + FIRST_SSE_REG);
3239 break;
3241 case E_OImode:
3242 case E_XImode:
3243 /* OImode and XImode shouldn't be used directly. */
3244 gcc_unreachable ();
3246 case E_V64QImode:
3247 case E_V32HImode:
3248 case E_V16SImode:
3249 case E_V8DImode:
3250 case E_V32HFmode:
3251 case E_V32BFmode:
3252 case E_V16SFmode:
3253 case E_V8DFmode:
3254 case E_V16HFmode:
3255 case E_V16BFmode:
3256 case E_V8SFmode:
3257 case E_V8SImode:
3258 case E_V32QImode:
3259 case E_V16HImode:
3260 case E_V4DFmode:
3261 case E_V4DImode:
3262 if (!type || !AGGREGATE_TYPE_P (type))
3264 if (cum->sse_nregs)
3265 return gen_reg_or_parallel (mode, orig_mode,
3266 cum->sse_regno + FIRST_SSE_REG);
3268 break;
3270 case E_V8QImode:
3271 case E_V4HImode:
3272 case E_V4HFmode:
3273 case E_V4BFmode:
3274 case E_V2SImode:
3275 case E_V2SFmode:
3276 case E_V1TImode:
3277 case E_V1DImode:
3278 if (!type || !AGGREGATE_TYPE_P (type))
3280 if (cum->mmx_nregs)
3281 return gen_reg_or_parallel (mode, orig_mode,
3282 cum->mmx_regno + FIRST_MMX_REG);
3284 break;
3286 if (error_p)
3288 cum->float_in_sse = 0;
3289 error ("calling %qD with SSE calling convention without "
3290 "SSE/SSE2 enabled", cum->decl);
3291 sorry ("this is a GCC bug that can be worked around by adding "
3292 "attribute used to function called");
3295 return NULL_RTX;
3298 static rtx
3299 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3300 machine_mode orig_mode, const_tree type, bool named)
3302 /* Handle a hidden AL argument containing number of registers
3303 for varargs x86-64 functions. */
3304 if (mode == VOIDmode)
3305 return GEN_INT (cum->maybe_vaarg
3306 ? (cum->sse_nregs < 0
3307 ? X86_64_SSE_REGPARM_MAX
3308 : cum->sse_regno)
3309 : -1);
3311 switch (mode)
3313 default:
3314 break;
3316 case E_V16HFmode:
3317 case E_V16BFmode:
3318 case E_V8SFmode:
3319 case E_V8SImode:
3320 case E_V32QImode:
3321 case E_V16HImode:
3322 case E_V4DFmode:
3323 case E_V4DImode:
3324 case E_V32HFmode:
3325 case E_V32BFmode:
3326 case E_V16SFmode:
3327 case E_V16SImode:
3328 case E_V64QImode:
3329 case E_V32HImode:
3330 case E_V8DFmode:
3331 case E_V8DImode:
3332 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3333 if (!named)
3334 return NULL;
3335 break;
3338 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3339 cum->sse_nregs,
3340 &x86_64_int_parameter_registers [cum->regno],
3341 cum->sse_regno);
3344 static rtx
3345 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3346 machine_mode orig_mode, bool named, const_tree type,
3347 HOST_WIDE_INT bytes)
3349 unsigned int regno;
3351 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3352 We use value of -2 to specify that current function call is MSABI. */
3353 if (mode == VOIDmode)
3354 return GEN_INT (-2);
3356 /* If we've run out of registers, it goes on the stack. */
3357 if (cum->nregs == 0)
3358 return NULL_RTX;
3360 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3362 /* Only floating point modes are passed in anything but integer regs. */
3363 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3365 if (named)
3367 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3368 regno = cum->regno + FIRST_SSE_REG;
3370 else
3372 rtx t1, t2;
3374 /* Unnamed floating parameters are passed in both the
3375 SSE and integer registers. */
3376 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3377 t2 = gen_rtx_REG (mode, regno);
3378 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3379 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3380 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3383 /* Handle aggregated types passed in register. */
3384 if (orig_mode == BLKmode)
3386 if (bytes > 0 && bytes <= 8)
3387 mode = (bytes > 4 ? DImode : SImode);
3388 if (mode == BLKmode)
3389 mode = DImode;
3392 return gen_reg_or_parallel (mode, orig_mode, regno);
3395 /* Return where to put the arguments to a function.
3396 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3398 ARG describes the argument while CUM gives information about the
3399 preceding args and about the function being called. */
3401 static rtx
3402 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3404 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3405 machine_mode mode = arg.mode;
3406 HOST_WIDE_INT bytes, words;
3407 rtx reg;
3409 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3411 gcc_assert (arg.type != NULL_TREE);
3412 if (POINTER_TYPE_P (arg.type))
3414 /* This is the pointer argument. */
3415 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3416 /* It is at -WORD(AP) in the current frame in interrupt and
3417 exception handlers. */
3418 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3420 else
3422 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3423 && TREE_CODE (arg.type) == INTEGER_TYPE
3424 && TYPE_MODE (arg.type) == word_mode);
3425 /* The error code is the word-mode integer argument at
3426 -2 * WORD(AP) in the current frame of the exception
3427 handler. */
3428 reg = gen_rtx_MEM (word_mode,
3429 plus_constant (Pmode,
3430 arg_pointer_rtx,
3431 -2 * UNITS_PER_WORD));
3433 return reg;
3436 bytes = arg.promoted_size_in_bytes ();
3437 words = CEIL (bytes, UNITS_PER_WORD);
3439 /* To simplify the code below, represent vector types with a vector mode
3440 even if MMX/SSE are not active. */
3441 if (arg.type && VECTOR_TYPE_P (arg.type))
3442 mode = type_natural_mode (arg.type, cum, false);
3444 if (TARGET_64BIT)
3446 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3448 if (call_abi == MS_ABI)
3449 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3450 arg.type, bytes);
3451 else
3452 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3454 else
3455 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3457 /* Track if there are outgoing arguments on stack. */
3458 if (reg == NULL_RTX && cum->caller)
3459 cfun->machine->outgoing_args_on_stack = true;
3461 return reg;
3464 /* A C expression that indicates when an argument must be passed by
3465 reference. If nonzero for an argument, a copy of that argument is
3466 made in memory and a pointer to the argument is passed instead of
3467 the argument itself. The pointer is passed in whatever way is
3468 appropriate for passing a pointer to that type. */
3470 static bool
3471 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3473 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3475 if (TARGET_64BIT)
3477 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3479 /* See Windows x64 Software Convention. */
3480 if (call_abi == MS_ABI)
3482 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3484 if (tree type = arg.type)
3486 /* Arrays are passed by reference. */
3487 if (TREE_CODE (type) == ARRAY_TYPE)
3488 return true;
3490 if (RECORD_OR_UNION_TYPE_P (type))
3492 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3493 are passed by reference. */
3494 msize = int_size_in_bytes (type);
3498 /* __m128 is passed by reference. */
3499 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3501 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3502 return true;
3505 return false;
3508 /* Return true when TYPE should be 128bit aligned for 32bit argument
3509 passing ABI. XXX: This function is obsolete and is only used for
3510 checking psABI compatibility with previous versions of GCC. */
3512 static bool
3513 ix86_compat_aligned_value_p (const_tree type)
3515 machine_mode mode = TYPE_MODE (type);
3516 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3517 || mode == TDmode
3518 || mode == TFmode
3519 || mode == TCmode)
3520 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3521 return true;
3522 if (TYPE_ALIGN (type) < 128)
3523 return false;
3525 if (AGGREGATE_TYPE_P (type))
3527 /* Walk the aggregates recursively. */
3528 switch (TREE_CODE (type))
3530 case RECORD_TYPE:
3531 case UNION_TYPE:
3532 case QUAL_UNION_TYPE:
3534 tree field;
3536 /* Walk all the structure fields. */
3537 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3539 if (TREE_CODE (field) == FIELD_DECL
3540 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3541 return true;
3543 break;
3546 case ARRAY_TYPE:
3547 /* Just for use if some languages passes arrays by value. */
3548 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3549 return true;
3550 break;
3552 default:
3553 gcc_unreachable ();
3556 return false;
3559 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3560 XXX: This function is obsolete and is only used for checking psABI
3561 compatibility with previous versions of GCC. */
3563 static unsigned int
3564 ix86_compat_function_arg_boundary (machine_mode mode,
3565 const_tree type, unsigned int align)
3567 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3568 natural boundaries. */
3569 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3571 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3572 make an exception for SSE modes since these require 128bit
3573 alignment.
3575 The handling here differs from field_alignment. ICC aligns MMX
3576 arguments to 4 byte boundaries, while structure fields are aligned
3577 to 8 byte boundaries. */
3578 if (!type)
3580 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3581 align = PARM_BOUNDARY;
3583 else
3585 if (!ix86_compat_aligned_value_p (type))
3586 align = PARM_BOUNDARY;
3589 if (align > BIGGEST_ALIGNMENT)
3590 align = BIGGEST_ALIGNMENT;
3591 return align;
3594 /* Return true when TYPE should be 128bit aligned for 32bit argument
3595 passing ABI. */
3597 static bool
3598 ix86_contains_aligned_value_p (const_tree type)
3600 machine_mode mode = TYPE_MODE (type);
3602 if (mode == XFmode || mode == XCmode)
3603 return false;
3605 if (TYPE_ALIGN (type) < 128)
3606 return false;
3608 if (AGGREGATE_TYPE_P (type))
3610 /* Walk the aggregates recursively. */
3611 switch (TREE_CODE (type))
3613 case RECORD_TYPE:
3614 case UNION_TYPE:
3615 case QUAL_UNION_TYPE:
3617 tree field;
3619 /* Walk all the structure fields. */
3620 for (field = TYPE_FIELDS (type);
3621 field;
3622 field = DECL_CHAIN (field))
3624 if (TREE_CODE (field) == FIELD_DECL
3625 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3626 return true;
3628 break;
3631 case ARRAY_TYPE:
3632 /* Just for use if some languages passes arrays by value. */
3633 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3634 return true;
3635 break;
3637 default:
3638 gcc_unreachable ();
3641 else
3642 return TYPE_ALIGN (type) >= 128;
3644 return false;
3647 /* Gives the alignment boundary, in bits, of an argument with the
3648 specified mode and type. */
3650 static unsigned int
3651 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3653 unsigned int align;
3654 if (type)
3656 /* Since the main variant type is used for call, we convert it to
3657 the main variant type. */
3658 type = TYPE_MAIN_VARIANT (type);
3659 align = TYPE_ALIGN (type);
3660 if (TYPE_EMPTY_P (type))
3661 return PARM_BOUNDARY;
3663 else
3664 align = GET_MODE_ALIGNMENT (mode);
3665 if (align < PARM_BOUNDARY)
3666 align = PARM_BOUNDARY;
3667 else
3669 static bool warned;
3670 unsigned int saved_align = align;
3672 if (!TARGET_64BIT)
3674 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3675 if (!type)
3677 if (mode == XFmode || mode == XCmode)
3678 align = PARM_BOUNDARY;
3680 else if (!ix86_contains_aligned_value_p (type))
3681 align = PARM_BOUNDARY;
3683 if (align < 128)
3684 align = PARM_BOUNDARY;
3687 if (warn_psabi
3688 && !warned
3689 && align != ix86_compat_function_arg_boundary (mode, type,
3690 saved_align))
3692 warned = true;
3693 inform (input_location,
3694 "the ABI for passing parameters with %d-byte"
3695 " alignment has changed in GCC 4.6",
3696 align / BITS_PER_UNIT);
3700 return align;
3703 /* Return true if N is a possible register number of function value. */
3705 static bool
3706 ix86_function_value_regno_p (const unsigned int regno)
3708 switch (regno)
3710 case AX_REG:
3711 return true;
3712 case DX_REG:
3713 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3714 case DI_REG:
3715 case SI_REG:
3716 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3718 /* Complex values are returned in %st(0)/%st(1) pair. */
3719 case ST0_REG:
3720 case ST1_REG:
3721 /* TODO: The function should depend on current function ABI but
3722 builtins.cc would need updating then. Therefore we use the
3723 default ABI. */
3724 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3725 return false;
3726 return TARGET_FLOAT_RETURNS_IN_80387;
3728 /* Complex values are returned in %xmm0/%xmm1 pair. */
3729 case XMM0_REG:
3730 case XMM1_REG:
3731 return TARGET_SSE;
3733 case MM0_REG:
3734 if (TARGET_MACHO || TARGET_64BIT)
3735 return false;
3736 return TARGET_MMX;
3739 return false;
3742 /* Check whether the register REGNO should be zeroed on X86.
3743 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3744 together, no need to zero it again.
3745 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3747 static bool
3748 zero_call_used_regno_p (const unsigned int regno,
3749 bool all_sse_zeroed,
3750 bool need_zero_mmx)
3752 return GENERAL_REGNO_P (regno)
3753 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3754 || MASK_REGNO_P (regno)
3755 || (need_zero_mmx && MMX_REGNO_P (regno));
3758 /* Return the machine_mode that is used to zero register REGNO. */
3760 static machine_mode
3761 zero_call_used_regno_mode (const unsigned int regno)
3763 /* NB: We only need to zero the lower 32 bits for integer registers
3764 and the lower 128 bits for vector registers since destination are
3765 zero-extended to the full register width. */
3766 if (GENERAL_REGNO_P (regno))
3767 return SImode;
3768 else if (SSE_REGNO_P (regno))
3769 return V4SFmode;
3770 else if (MASK_REGNO_P (regno))
3771 return HImode;
3772 else if (MMX_REGNO_P (regno))
3773 return V2SImode;
3774 else
3775 gcc_unreachable ();
3778 /* Generate a rtx to zero all vector registers together if possible,
3779 otherwise, return NULL. */
3781 static rtx
3782 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3784 if (!TARGET_AVX)
3785 return NULL;
3787 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3788 if ((LEGACY_SSE_REGNO_P (regno)
3789 || (TARGET_64BIT
3790 && (REX_SSE_REGNO_P (regno)
3791 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3792 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3793 return NULL;
3795 return gen_avx_vzeroall ();
3798 /* Generate insns to zero all st registers together.
3799 Return true when zeroing instructions are generated.
3800 Assume the number of st registers that are zeroed is num_of_st,
3801 we will emit the following sequence to zero them together:
3802 fldz; \
3803 fldz; \
3805 fldz; \
3806 fstp %%st(0); \
3807 fstp %%st(0); \
3809 fstp %%st(0);
3810 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3811 mark stack slots empty.
3813 How to compute the num_of_st:
3814 There is no direct mapping from stack registers to hard register
3815 numbers. If one stack register needs to be cleared, we don't know
3816 where in the stack the value remains. So, if any stack register
3817 needs to be cleared, the whole stack should be cleared. However,
3818 x87 stack registers that hold the return value should be excluded.
3819 x87 returns in the top (two for complex values) register, so
3820 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3821 return the value of num_of_st. */
3824 static int
3825 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3828 /* If the FPU is disabled, no need to zero all st registers. */
3829 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3830 return 0;
3832 unsigned int num_of_st = 0;
3833 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3834 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3835 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3837 num_of_st++;
3838 break;
3841 if (num_of_st == 0)
3842 return 0;
3844 bool return_with_x87 = false;
3845 return_with_x87 = (crtl->return_rtx
3846 && (STACK_REG_P (crtl->return_rtx)));
3848 bool complex_return = false;
3849 complex_return = (crtl->return_rtx
3850 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3852 if (return_with_x87)
3853 if (complex_return)
3854 num_of_st = 6;
3855 else
3856 num_of_st = 7;
3857 else
3858 num_of_st = 8;
3860 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3861 for (unsigned int i = 0; i < num_of_st; i++)
3862 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3864 for (unsigned int i = 0; i < num_of_st; i++)
3866 rtx insn;
3867 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3868 add_reg_note (insn, REG_DEAD, st_reg);
3870 return num_of_st;
3874 /* When the routine exit in MMX mode, if any ST register needs
3875 to be zeroed, we should clear all MMX registers except the
3876 RET_MMX_REGNO that holds the return value. */
3877 static bool
3878 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3879 unsigned int ret_mmx_regno)
3881 bool need_zero_all_mm = false;
3882 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3883 if (STACK_REGNO_P (regno)
3884 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3886 need_zero_all_mm = true;
3887 break;
3890 if (!need_zero_all_mm)
3891 return false;
3893 machine_mode mode = V2SImode;
3894 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3895 if (regno != ret_mmx_regno)
3897 rtx reg = gen_rtx_REG (mode, regno);
3898 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3900 return true;
3903 /* TARGET_ZERO_CALL_USED_REGS. */
3904 /* Generate a sequence of instructions that zero registers specified by
3905 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3906 zeroed. */
3907 static HARD_REG_SET
3908 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3910 HARD_REG_SET zeroed_hardregs;
3911 bool all_sse_zeroed = false;
3912 int all_st_zeroed_num = 0;
3913 bool all_mm_zeroed = false;
3915 CLEAR_HARD_REG_SET (zeroed_hardregs);
3917 /* first, let's see whether we can zero all vector registers together. */
3918 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3919 if (zero_all_vec_insn)
3921 emit_insn (zero_all_vec_insn);
3922 all_sse_zeroed = true;
3925 /* mm/st registers are shared registers set, we should follow the following
3926 rules to clear them:
3927 MMX exit mode x87 exit mode
3928 -------------|----------------------|---------------
3929 uses x87 reg | clear all MMX | clear all x87
3930 uses MMX reg | clear individual MMX | clear all x87
3931 x87 + MMX | clear all MMX | clear all x87
3933 first, we should decide which mode (MMX mode or x87 mode) the function
3934 exit with. */
3936 bool exit_with_mmx_mode = (crtl->return_rtx
3937 && (MMX_REG_P (crtl->return_rtx)));
3939 if (!exit_with_mmx_mode)
3940 /* x87 exit mode, we should zero all st registers together. */
3942 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
3944 if (all_st_zeroed_num > 0)
3945 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
3946 /* x87 stack registers that hold the return value should be excluded.
3947 x87 returns in the top (two for complex values) register. */
3948 if (all_st_zeroed_num == 8
3949 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
3950 || (all_st_zeroed_num == 6
3951 && (regno == (REGNO (crtl->return_rtx) + 1)))))
3952 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3954 else
3955 /* MMX exit mode, check whether we can zero all mm registers. */
3957 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3958 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3959 exit_mmx_regno);
3960 if (all_mm_zeroed)
3961 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3962 if (regno != exit_mmx_regno)
3963 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3966 /* Now, generate instructions to zero all the other registers. */
3968 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3970 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3971 continue;
3972 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
3973 exit_with_mmx_mode && !all_mm_zeroed))
3974 continue;
3976 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3978 machine_mode mode = zero_call_used_regno_mode (regno);
3980 rtx reg = gen_rtx_REG (mode, regno);
3981 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
3983 switch (mode)
3985 case E_SImode:
3986 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
3988 rtx clob = gen_rtx_CLOBBER (VOIDmode,
3989 gen_rtx_REG (CCmode,
3990 FLAGS_REG));
3991 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
3992 tmp,
3993 clob));
3995 /* FALLTHRU. */
3997 case E_V4SFmode:
3998 case E_HImode:
3999 case E_V2SImode:
4000 emit_insn (tmp);
4001 break;
4003 default:
4004 gcc_unreachable ();
4007 return zeroed_hardregs;
4010 /* Define how to find the value returned by a function.
4011 VALTYPE is the data type of the value (as a tree).
4012 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4013 otherwise, FUNC is 0. */
4015 static rtx
4016 function_value_32 (machine_mode orig_mode, machine_mode mode,
4017 const_tree fntype, const_tree fn)
4019 unsigned int regno;
4021 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4022 we normally prevent this case when mmx is not available. However
4023 some ABIs may require the result to be returned like DImode. */
4024 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4025 regno = FIRST_MMX_REG;
4027 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4028 we prevent this case when sse is not available. However some ABIs
4029 may require the result to be returned like integer TImode. */
4030 else if (mode == TImode
4031 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4032 regno = FIRST_SSE_REG;
4034 /* 32-byte vector modes in %ymm0. */
4035 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4036 regno = FIRST_SSE_REG;
4038 /* 64-byte vector modes in %zmm0. */
4039 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4040 regno = FIRST_SSE_REG;
4042 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4043 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4044 regno = FIRST_FLOAT_REG;
4045 else
4046 /* Most things go in %eax. */
4047 regno = AX_REG;
4049 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4050 if (mode == HFmode || mode == BFmode)
4052 if (!TARGET_SSE2)
4054 error ("SSE register return with SSE2 disabled");
4055 regno = AX_REG;
4057 else
4058 regno = FIRST_SSE_REG;
4061 if (mode == HCmode)
4063 if (!TARGET_SSE2)
4064 error ("SSE register return with SSE2 disabled");
4066 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4067 XVECEXP (ret, 0, 0)
4068 = gen_rtx_EXPR_LIST (VOIDmode,
4069 gen_rtx_REG (SImode,
4070 TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
4071 GEN_INT (0));
4072 return ret;
4075 /* Override FP return register with %xmm0 for local functions when
4076 SSE math is enabled or for functions with sseregparm attribute. */
4077 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4079 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4080 if (sse_level == -1)
4082 error ("calling %qD with SSE calling convention without "
4083 "SSE/SSE2 enabled", fn);
4084 sorry ("this is a GCC bug that can be worked around by adding "
4085 "attribute used to function called");
4087 else if ((sse_level >= 1 && mode == SFmode)
4088 || (sse_level == 2 && mode == DFmode))
4089 regno = FIRST_SSE_REG;
4092 /* OImode shouldn't be used directly. */
4093 gcc_assert (mode != OImode);
4095 return gen_rtx_REG (orig_mode, regno);
4098 static rtx
4099 function_value_64 (machine_mode orig_mode, machine_mode mode,
4100 const_tree valtype)
4102 rtx ret;
4104 /* Handle libcalls, which don't provide a type node. */
4105 if (valtype == NULL)
4107 unsigned int regno;
4109 switch (mode)
4111 case E_BFmode:
4112 case E_HFmode:
4113 case E_HCmode:
4114 case E_SFmode:
4115 case E_SCmode:
4116 case E_DFmode:
4117 case E_DCmode:
4118 case E_TFmode:
4119 case E_SDmode:
4120 case E_DDmode:
4121 case E_TDmode:
4122 regno = FIRST_SSE_REG;
4123 break;
4124 case E_XFmode:
4125 case E_XCmode:
4126 regno = FIRST_FLOAT_REG;
4127 break;
4128 case E_TCmode:
4129 return NULL;
4130 default:
4131 regno = AX_REG;
4134 return gen_rtx_REG (mode, regno);
4136 else if (POINTER_TYPE_P (valtype))
4138 /* Pointers are always returned in word_mode. */
4139 mode = word_mode;
4142 ret = construct_container (mode, orig_mode, valtype, 1,
4143 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4144 x86_64_int_return_registers, 0);
4146 /* For zero sized structures, construct_container returns NULL, but we
4147 need to keep rest of compiler happy by returning meaningful value. */
4148 if (!ret)
4149 ret = gen_rtx_REG (orig_mode, AX_REG);
4151 return ret;
4154 static rtx
4155 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4156 const_tree fntype, const_tree fn, const_tree valtype)
4158 unsigned int regno;
4160 /* Floating point return values in %st(0)
4161 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4162 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4163 && (GET_MODE_SIZE (mode) > 8
4164 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4166 regno = FIRST_FLOAT_REG;
4167 return gen_rtx_REG (orig_mode, regno);
4169 else
4170 return function_value_32(orig_mode, mode, fntype,fn);
4173 static rtx
4174 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4175 const_tree valtype)
4177 unsigned int regno = AX_REG;
4179 if (TARGET_SSE)
4181 switch (GET_MODE_SIZE (mode))
4183 case 16:
4184 if (valtype != NULL_TREE
4185 && !VECTOR_INTEGER_TYPE_P (valtype)
4186 && !VECTOR_INTEGER_TYPE_P (valtype)
4187 && !INTEGRAL_TYPE_P (valtype)
4188 && !VECTOR_FLOAT_TYPE_P (valtype))
4189 break;
4190 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4191 && !COMPLEX_MODE_P (mode))
4192 regno = FIRST_SSE_REG;
4193 break;
4194 case 8:
4195 case 4:
4196 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4197 break;
4198 if (mode == SFmode || mode == DFmode)
4199 regno = FIRST_SSE_REG;
4200 break;
4201 default:
4202 break;
4205 return gen_rtx_REG (orig_mode, regno);
4208 static rtx
4209 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4210 machine_mode orig_mode, machine_mode mode)
4212 const_tree fn, fntype;
4214 fn = NULL_TREE;
4215 if (fntype_or_decl && DECL_P (fntype_or_decl))
4216 fn = fntype_or_decl;
4217 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4219 if (ix86_function_type_abi (fntype) == MS_ABI)
4221 if (TARGET_64BIT)
4222 return function_value_ms_64 (orig_mode, mode, valtype);
4223 else
4224 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4226 else if (TARGET_64BIT)
4227 return function_value_64 (orig_mode, mode, valtype);
4228 else
4229 return function_value_32 (orig_mode, mode, fntype, fn);
4232 static rtx
4233 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4235 machine_mode mode, orig_mode;
4237 orig_mode = TYPE_MODE (valtype);
4238 mode = type_natural_mode (valtype, NULL, true);
4239 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4242 /* Pointer function arguments and return values are promoted to
4243 word_mode for normal functions. */
4245 static machine_mode
4246 ix86_promote_function_mode (const_tree type, machine_mode mode,
4247 int *punsignedp, const_tree fntype,
4248 int for_return)
4250 if (cfun->machine->func_type == TYPE_NORMAL
4251 && type != NULL_TREE
4252 && POINTER_TYPE_P (type))
4254 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4255 return word_mode;
4257 return default_promote_function_mode (type, mode, punsignedp, fntype,
4258 for_return);
4261 /* Return true if a structure, union or array with MODE containing FIELD
4262 should be accessed using BLKmode. */
4264 static bool
4265 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4267 /* Union with XFmode must be in BLKmode. */
4268 return (mode == XFmode
4269 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4270 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4274 ix86_libcall_value (machine_mode mode)
4276 return ix86_function_value_1 (NULL, NULL, mode, mode);
4279 /* Return true iff type is returned in memory. */
4281 static bool
4282 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4284 const machine_mode mode = type_natural_mode (type, NULL, true);
4285 HOST_WIDE_INT size;
4287 if (TARGET_64BIT)
4289 if (ix86_function_type_abi (fntype) == MS_ABI)
4291 size = int_size_in_bytes (type);
4293 /* __m128 is returned in xmm0. */
4294 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4295 || INTEGRAL_TYPE_P (type)
4296 || VECTOR_FLOAT_TYPE_P (type))
4297 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4298 && !COMPLEX_MODE_P (mode)
4299 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4300 return false;
4302 /* Otherwise, the size must be exactly in [1248]. */
4303 return size != 1 && size != 2 && size != 4 && size != 8;
4305 else
4307 int needed_intregs, needed_sseregs;
4309 return examine_argument (mode, type, 1,
4310 &needed_intregs, &needed_sseregs);
4313 else
4315 size = int_size_in_bytes (type);
4317 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4318 bytes in registers. */
4319 if (TARGET_IAMCU)
4320 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4322 if (mode == BLKmode)
4323 return true;
4325 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4326 return false;
4328 if (VECTOR_MODE_P (mode) || mode == TImode)
4330 /* User-created vectors small enough to fit in EAX. */
4331 if (size < 8)
4332 return false;
4334 /* Unless ABI prescibes otherwise,
4335 MMX/3dNow values are returned in MM0 if available. */
4337 if (size == 8)
4338 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4340 /* SSE values are returned in XMM0 if available. */
4341 if (size == 16)
4342 return !TARGET_SSE;
4344 /* AVX values are returned in YMM0 if available. */
4345 if (size == 32)
4346 return !TARGET_AVX;
4348 /* AVX512F values are returned in ZMM0 if available. */
4349 if (size == 64)
4350 return !TARGET_AVX512F;
4353 if (mode == XFmode)
4354 return false;
4356 if (size > 12)
4357 return true;
4359 /* OImode shouldn't be used directly. */
4360 gcc_assert (mode != OImode);
4362 return false;
4366 /* Implement TARGET_PUSH_ARGUMENT. */
4368 static bool
4369 ix86_push_argument (unsigned int npush)
4371 /* If SSE2 is available, use vector move to put large argument onto
4372 stack. NB: In 32-bit mode, use 8-byte vector move. */
4373 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4374 && TARGET_PUSH_ARGS
4375 && !ACCUMULATE_OUTGOING_ARGS);
4379 /* Create the va_list data type. */
4381 static tree
4382 ix86_build_builtin_va_list_64 (void)
4384 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4386 record = lang_hooks.types.make_type (RECORD_TYPE);
4387 type_decl = build_decl (BUILTINS_LOCATION,
4388 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4390 f_gpr = build_decl (BUILTINS_LOCATION,
4391 FIELD_DECL, get_identifier ("gp_offset"),
4392 unsigned_type_node);
4393 f_fpr = build_decl (BUILTINS_LOCATION,
4394 FIELD_DECL, get_identifier ("fp_offset"),
4395 unsigned_type_node);
4396 f_ovf = build_decl (BUILTINS_LOCATION,
4397 FIELD_DECL, get_identifier ("overflow_arg_area"),
4398 ptr_type_node);
4399 f_sav = build_decl (BUILTINS_LOCATION,
4400 FIELD_DECL, get_identifier ("reg_save_area"),
4401 ptr_type_node);
4403 va_list_gpr_counter_field = f_gpr;
4404 va_list_fpr_counter_field = f_fpr;
4406 DECL_FIELD_CONTEXT (f_gpr) = record;
4407 DECL_FIELD_CONTEXT (f_fpr) = record;
4408 DECL_FIELD_CONTEXT (f_ovf) = record;
4409 DECL_FIELD_CONTEXT (f_sav) = record;
4411 TYPE_STUB_DECL (record) = type_decl;
4412 TYPE_NAME (record) = type_decl;
4413 TYPE_FIELDS (record) = f_gpr;
4414 DECL_CHAIN (f_gpr) = f_fpr;
4415 DECL_CHAIN (f_fpr) = f_ovf;
4416 DECL_CHAIN (f_ovf) = f_sav;
4418 layout_type (record);
4420 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4421 NULL_TREE, TYPE_ATTRIBUTES (record));
4423 /* The correct type is an array type of one element. */
4424 return build_array_type (record, build_index_type (size_zero_node));
4427 /* Setup the builtin va_list data type and for 64-bit the additional
4428 calling convention specific va_list data types. */
4430 static tree
4431 ix86_build_builtin_va_list (void)
4433 if (TARGET_64BIT)
4435 /* Initialize ABI specific va_list builtin types.
4437 In lto1, we can encounter two va_list types:
4438 - one as a result of the type-merge across TUs, and
4439 - the one constructed here.
4440 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4441 a type identity check in canonical_va_list_type based on
4442 TYPE_MAIN_VARIANT (which we used to have) will not work.
4443 Instead, we tag each va_list_type_node with its unique attribute, and
4444 look for the attribute in the type identity check in
4445 canonical_va_list_type.
4447 Tagging sysv_va_list_type_node directly with the attribute is
4448 problematic since it's a array of one record, which will degrade into a
4449 pointer to record when used as parameter (see build_va_arg comments for
4450 an example), dropping the attribute in the process. So we tag the
4451 record instead. */
4453 /* For SYSV_ABI we use an array of one record. */
4454 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4456 /* For MS_ABI we use plain pointer to argument area. */
4457 tree char_ptr_type = build_pointer_type (char_type_node);
4458 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4459 TYPE_ATTRIBUTES (char_ptr_type));
4460 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4462 return ((ix86_abi == MS_ABI)
4463 ? ms_va_list_type_node
4464 : sysv_va_list_type_node);
4466 else
4468 /* For i386 we use plain pointer to argument area. */
4469 return build_pointer_type (char_type_node);
4473 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4475 static void
4476 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4478 rtx save_area, mem;
4479 alias_set_type set;
4480 int i, max;
4482 /* GPR size of varargs save area. */
4483 if (cfun->va_list_gpr_size)
4484 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4485 else
4486 ix86_varargs_gpr_size = 0;
4488 /* FPR size of varargs save area. We don't need it if we don't pass
4489 anything in SSE registers. */
4490 if (TARGET_SSE && cfun->va_list_fpr_size)
4491 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4492 else
4493 ix86_varargs_fpr_size = 0;
4495 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4496 return;
4498 save_area = frame_pointer_rtx;
4499 set = get_varargs_alias_set ();
4501 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4502 if (max > X86_64_REGPARM_MAX)
4503 max = X86_64_REGPARM_MAX;
4505 for (i = cum->regno; i < max; i++)
4507 mem = gen_rtx_MEM (word_mode,
4508 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4509 MEM_NOTRAP_P (mem) = 1;
4510 set_mem_alias_set (mem, set);
4511 emit_move_insn (mem,
4512 gen_rtx_REG (word_mode,
4513 x86_64_int_parameter_registers[i]));
4516 if (ix86_varargs_fpr_size)
4518 machine_mode smode;
4519 rtx_code_label *label;
4520 rtx test;
4522 /* Now emit code to save SSE registers. The AX parameter contains number
4523 of SSE parameter registers used to call this function, though all we
4524 actually check here is the zero/non-zero status. */
4526 label = gen_label_rtx ();
4527 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4528 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4529 label));
4531 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4532 we used movdqa (i.e. TImode) instead? Perhaps even better would
4533 be if we could determine the real mode of the data, via a hook
4534 into pass_stdarg. Ignore all that for now. */
4535 smode = V4SFmode;
4536 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4537 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4539 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4540 if (max > X86_64_SSE_REGPARM_MAX)
4541 max = X86_64_SSE_REGPARM_MAX;
4543 for (i = cum->sse_regno; i < max; ++i)
4545 mem = plus_constant (Pmode, save_area,
4546 i * 16 + ix86_varargs_gpr_size);
4547 mem = gen_rtx_MEM (smode, mem);
4548 MEM_NOTRAP_P (mem) = 1;
4549 set_mem_alias_set (mem, set);
4550 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4552 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4555 emit_label (label);
4559 static void
4560 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4562 alias_set_type set = get_varargs_alias_set ();
4563 int i;
4565 /* Reset to zero, as there might be a sysv vaarg used
4566 before. */
4567 ix86_varargs_gpr_size = 0;
4568 ix86_varargs_fpr_size = 0;
4570 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4572 rtx reg, mem;
4574 mem = gen_rtx_MEM (Pmode,
4575 plus_constant (Pmode, virtual_incoming_args_rtx,
4576 i * UNITS_PER_WORD));
4577 MEM_NOTRAP_P (mem) = 1;
4578 set_mem_alias_set (mem, set);
4580 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4581 emit_move_insn (mem, reg);
4585 static void
4586 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4587 const function_arg_info &arg,
4588 int *, int no_rtl)
4590 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4591 CUMULATIVE_ARGS next_cum;
4592 tree fntype;
4594 /* This argument doesn't appear to be used anymore. Which is good,
4595 because the old code here didn't suppress rtl generation. */
4596 gcc_assert (!no_rtl);
4598 if (!TARGET_64BIT)
4599 return;
4601 fntype = TREE_TYPE (current_function_decl);
4603 /* For varargs, we do not want to skip the dummy va_dcl argument.
4604 For stdargs, we do want to skip the last named argument. */
4605 next_cum = *cum;
4606 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4607 && stdarg_p (fntype))
4608 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4610 if (cum->call_abi == MS_ABI)
4611 setup_incoming_varargs_ms_64 (&next_cum);
4612 else
4613 setup_incoming_varargs_64 (&next_cum);
4616 /* Checks if TYPE is of kind va_list char *. */
4618 static bool
4619 is_va_list_char_pointer (tree type)
4621 tree canonic;
4623 /* For 32-bit it is always true. */
4624 if (!TARGET_64BIT)
4625 return true;
4626 canonic = ix86_canonical_va_list_type (type);
4627 return (canonic == ms_va_list_type_node
4628 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4631 /* Implement va_start. */
4633 static void
4634 ix86_va_start (tree valist, rtx nextarg)
4636 HOST_WIDE_INT words, n_gpr, n_fpr;
4637 tree f_gpr, f_fpr, f_ovf, f_sav;
4638 tree gpr, fpr, ovf, sav, t;
4639 tree type;
4640 rtx ovf_rtx;
4642 if (flag_split_stack
4643 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4645 unsigned int scratch_regno;
4647 /* When we are splitting the stack, we can't refer to the stack
4648 arguments using internal_arg_pointer, because they may be on
4649 the old stack. The split stack prologue will arrange to
4650 leave a pointer to the old stack arguments in a scratch
4651 register, which we here copy to a pseudo-register. The split
4652 stack prologue can't set the pseudo-register directly because
4653 it (the prologue) runs before any registers have been saved. */
4655 scratch_regno = split_stack_prologue_scratch_regno ();
4656 if (scratch_regno != INVALID_REGNUM)
4658 rtx reg;
4659 rtx_insn *seq;
4661 reg = gen_reg_rtx (Pmode);
4662 cfun->machine->split_stack_varargs_pointer = reg;
4664 start_sequence ();
4665 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4666 seq = get_insns ();
4667 end_sequence ();
4669 push_topmost_sequence ();
4670 emit_insn_after (seq, entry_of_function ());
4671 pop_topmost_sequence ();
4675 /* Only 64bit target needs something special. */
4676 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4678 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4679 std_expand_builtin_va_start (valist, nextarg);
4680 else
4682 rtx va_r, next;
4684 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4685 next = expand_binop (ptr_mode, add_optab,
4686 cfun->machine->split_stack_varargs_pointer,
4687 crtl->args.arg_offset_rtx,
4688 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4689 convert_move (va_r, next, 0);
4691 return;
4694 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4695 f_fpr = DECL_CHAIN (f_gpr);
4696 f_ovf = DECL_CHAIN (f_fpr);
4697 f_sav = DECL_CHAIN (f_ovf);
4699 valist = build_simple_mem_ref (valist);
4700 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4701 /* The following should be folded into the MEM_REF offset. */
4702 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4703 f_gpr, NULL_TREE);
4704 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4705 f_fpr, NULL_TREE);
4706 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4707 f_ovf, NULL_TREE);
4708 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4709 f_sav, NULL_TREE);
4711 /* Count number of gp and fp argument registers used. */
4712 words = crtl->args.info.words;
4713 n_gpr = crtl->args.info.regno;
4714 n_fpr = crtl->args.info.sse_regno;
4716 if (cfun->va_list_gpr_size)
4718 type = TREE_TYPE (gpr);
4719 t = build2 (MODIFY_EXPR, type,
4720 gpr, build_int_cst (type, n_gpr * 8));
4721 TREE_SIDE_EFFECTS (t) = 1;
4722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4725 if (TARGET_SSE && cfun->va_list_fpr_size)
4727 type = TREE_TYPE (fpr);
4728 t = build2 (MODIFY_EXPR, type, fpr,
4729 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4730 TREE_SIDE_EFFECTS (t) = 1;
4731 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4734 /* Find the overflow area. */
4735 type = TREE_TYPE (ovf);
4736 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4737 ovf_rtx = crtl->args.internal_arg_pointer;
4738 else
4739 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4740 t = make_tree (type, ovf_rtx);
4741 if (words != 0)
4742 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4744 t = build2 (MODIFY_EXPR, type, ovf, t);
4745 TREE_SIDE_EFFECTS (t) = 1;
4746 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4748 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4750 /* Find the register save area.
4751 Prologue of the function save it right above stack frame. */
4752 type = TREE_TYPE (sav);
4753 t = make_tree (type, frame_pointer_rtx);
4754 if (!ix86_varargs_gpr_size)
4755 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4757 t = build2 (MODIFY_EXPR, type, sav, t);
4758 TREE_SIDE_EFFECTS (t) = 1;
4759 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4763 /* Implement va_arg. */
4765 static tree
4766 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4767 gimple_seq *post_p)
4769 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4770 tree f_gpr, f_fpr, f_ovf, f_sav;
4771 tree gpr, fpr, ovf, sav, t;
4772 int size, rsize;
4773 tree lab_false, lab_over = NULL_TREE;
4774 tree addr, t2;
4775 rtx container;
4776 int indirect_p = 0;
4777 tree ptrtype;
4778 machine_mode nat_mode;
4779 unsigned int arg_boundary;
4780 unsigned int type_align;
4782 /* Only 64bit target needs something special. */
4783 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4784 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4786 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4787 f_fpr = DECL_CHAIN (f_gpr);
4788 f_ovf = DECL_CHAIN (f_fpr);
4789 f_sav = DECL_CHAIN (f_ovf);
4791 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4792 valist, f_gpr, NULL_TREE);
4794 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4795 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4796 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4798 indirect_p = pass_va_arg_by_reference (type);
4799 if (indirect_p)
4800 type = build_pointer_type (type);
4801 size = arg_int_size_in_bytes (type);
4802 rsize = CEIL (size, UNITS_PER_WORD);
4804 nat_mode = type_natural_mode (type, NULL, false);
4805 switch (nat_mode)
4807 case E_V16HFmode:
4808 case E_V16BFmode:
4809 case E_V8SFmode:
4810 case E_V8SImode:
4811 case E_V32QImode:
4812 case E_V16HImode:
4813 case E_V4DFmode:
4814 case E_V4DImode:
4815 case E_V32HFmode:
4816 case E_V32BFmode:
4817 case E_V16SFmode:
4818 case E_V16SImode:
4819 case E_V64QImode:
4820 case E_V32HImode:
4821 case E_V8DFmode:
4822 case E_V8DImode:
4823 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4824 if (!TARGET_64BIT_MS_ABI)
4826 container = NULL;
4827 break;
4829 /* FALLTHRU */
4831 default:
4832 container = construct_container (nat_mode, TYPE_MODE (type),
4833 type, 0, X86_64_REGPARM_MAX,
4834 X86_64_SSE_REGPARM_MAX, intreg,
4836 break;
4839 /* Pull the value out of the saved registers. */
4841 addr = create_tmp_var (ptr_type_node, "addr");
4842 type_align = TYPE_ALIGN (type);
4844 if (container)
4846 int needed_intregs, needed_sseregs;
4847 bool need_temp;
4848 tree int_addr, sse_addr;
4850 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4851 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4853 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4855 need_temp = (!REG_P (container)
4856 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4857 || TYPE_ALIGN (type) > 128));
4859 /* In case we are passing structure, verify that it is consecutive block
4860 on the register save area. If not we need to do moves. */
4861 if (!need_temp && !REG_P (container))
4863 /* Verify that all registers are strictly consecutive */
4864 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4866 int i;
4868 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4870 rtx slot = XVECEXP (container, 0, i);
4871 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4872 || INTVAL (XEXP (slot, 1)) != i * 16)
4873 need_temp = true;
4876 else
4878 int i;
4880 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4882 rtx slot = XVECEXP (container, 0, i);
4883 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4884 || INTVAL (XEXP (slot, 1)) != i * 8)
4885 need_temp = true;
4889 if (!need_temp)
4891 int_addr = addr;
4892 sse_addr = addr;
4894 else
4896 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4897 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4900 /* First ensure that we fit completely in registers. */
4901 if (needed_intregs)
4903 t = build_int_cst (TREE_TYPE (gpr),
4904 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4905 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4906 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4907 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4908 gimplify_and_add (t, pre_p);
4910 if (needed_sseregs)
4912 t = build_int_cst (TREE_TYPE (fpr),
4913 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4914 + X86_64_REGPARM_MAX * 8);
4915 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4916 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4917 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4918 gimplify_and_add (t, pre_p);
4921 /* Compute index to start of area used for integer regs. */
4922 if (needed_intregs)
4924 /* int_addr = gpr + sav; */
4925 t = fold_build_pointer_plus (sav, gpr);
4926 gimplify_assign (int_addr, t, pre_p);
4928 if (needed_sseregs)
4930 /* sse_addr = fpr + sav; */
4931 t = fold_build_pointer_plus (sav, fpr);
4932 gimplify_assign (sse_addr, t, pre_p);
4934 if (need_temp)
4936 int i, prev_size = 0;
4937 tree temp = create_tmp_var (type, "va_arg_tmp");
4938 TREE_ADDRESSABLE (temp) = 1;
4940 /* addr = &temp; */
4941 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4942 gimplify_assign (addr, t, pre_p);
4944 for (i = 0; i < XVECLEN (container, 0); i++)
4946 rtx slot = XVECEXP (container, 0, i);
4947 rtx reg = XEXP (slot, 0);
4948 machine_mode mode = GET_MODE (reg);
4949 tree piece_type;
4950 tree addr_type;
4951 tree daddr_type;
4952 tree src_addr, src;
4953 int src_offset;
4954 tree dest_addr, dest;
4955 int cur_size = GET_MODE_SIZE (mode);
4957 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4958 prev_size = INTVAL (XEXP (slot, 1));
4959 if (prev_size + cur_size > size)
4961 cur_size = size - prev_size;
4962 unsigned int nbits = cur_size * BITS_PER_UNIT;
4963 if (!int_mode_for_size (nbits, 1).exists (&mode))
4964 mode = QImode;
4966 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4967 if (mode == GET_MODE (reg))
4968 addr_type = build_pointer_type (piece_type);
4969 else
4970 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4971 true);
4972 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4973 true);
4975 if (SSE_REGNO_P (REGNO (reg)))
4977 src_addr = sse_addr;
4978 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4980 else
4982 src_addr = int_addr;
4983 src_offset = REGNO (reg) * 8;
4985 src_addr = fold_convert (addr_type, src_addr);
4986 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4988 dest_addr = fold_convert (daddr_type, addr);
4989 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4990 if (cur_size == GET_MODE_SIZE (mode))
4992 src = build_va_arg_indirect_ref (src_addr);
4993 dest = build_va_arg_indirect_ref (dest_addr);
4995 gimplify_assign (dest, src, pre_p);
4997 else
4999 tree copy
5000 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
5001 3, dest_addr, src_addr,
5002 size_int (cur_size));
5003 gimplify_and_add (copy, pre_p);
5005 prev_size += cur_size;
5009 if (needed_intregs)
5011 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5012 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5013 gimplify_assign (gpr, t, pre_p);
5014 /* The GPR save area guarantees only 8-byte alignment. */
5015 if (!need_temp)
5016 type_align = MIN (type_align, 64);
5019 if (needed_sseregs)
5021 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5022 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5023 gimplify_assign (unshare_expr (fpr), t, pre_p);
5026 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
5028 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
5031 /* ... otherwise out of the overflow area. */
5033 /* When we align parameter on stack for caller, if the parameter
5034 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
5035 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
5036 here with caller. */
5037 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
5038 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5039 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5041 /* Care for on-stack alignment if needed. */
5042 if (arg_boundary <= 64 || size == 0)
5043 t = ovf;
5044 else
5046 HOST_WIDE_INT align = arg_boundary / 8;
5047 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5048 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5049 build_int_cst (TREE_TYPE (t), -align));
5052 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5053 gimplify_assign (addr, t, pre_p);
5055 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5056 gimplify_assign (unshare_expr (ovf), t, pre_p);
5058 if (container)
5059 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5061 type = build_aligned_type (type, type_align);
5062 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5063 addr = fold_convert (ptrtype, addr);
5065 if (indirect_p)
5066 addr = build_va_arg_indirect_ref (addr);
5067 return build_va_arg_indirect_ref (addr);
5070 /* Return true if OPNUM's MEM should be matched
5071 in movabs* patterns. */
5073 bool
5074 ix86_check_movabs (rtx insn, int opnum)
5076 rtx set, mem;
5078 set = PATTERN (insn);
5079 if (GET_CODE (set) == PARALLEL)
5080 set = XVECEXP (set, 0, 0);
5081 gcc_assert (GET_CODE (set) == SET);
5082 mem = XEXP (set, opnum);
5083 while (SUBREG_P (mem))
5084 mem = SUBREG_REG (mem);
5085 gcc_assert (MEM_P (mem));
5086 return volatile_ok || !MEM_VOLATILE_P (mem);
5089 /* Return false if INSN contains a MEM with a non-default address space. */
5090 bool
5091 ix86_check_no_addr_space (rtx insn)
5093 subrtx_var_iterator::array_type array;
5094 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5096 rtx x = *iter;
5097 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5098 return false;
5100 return true;
5103 /* Initialize the table of extra 80387 mathematical constants. */
5105 static void
5106 init_ext_80387_constants (void)
5108 static const char * cst[5] =
5110 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5111 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5112 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5113 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5114 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5116 int i;
5118 for (i = 0; i < 5; i++)
5120 real_from_string (&ext_80387_constants_table[i], cst[i]);
5121 /* Ensure each constant is rounded to XFmode precision. */
5122 real_convert (&ext_80387_constants_table[i],
5123 XFmode, &ext_80387_constants_table[i]);
5126 ext_80387_constants_init = 1;
5129 /* Return non-zero if the constant is something that
5130 can be loaded with a special instruction. */
5133 standard_80387_constant_p (rtx x)
5135 machine_mode mode = GET_MODE (x);
5137 const REAL_VALUE_TYPE *r;
5139 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5140 return -1;
5142 if (x == CONST0_RTX (mode))
5143 return 1;
5144 if (x == CONST1_RTX (mode))
5145 return 2;
5147 r = CONST_DOUBLE_REAL_VALUE (x);
5149 /* For XFmode constants, try to find a special 80387 instruction when
5150 optimizing for size or on those CPUs that benefit from them. */
5151 if (mode == XFmode
5152 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5153 && !flag_rounding_math)
5155 int i;
5157 if (! ext_80387_constants_init)
5158 init_ext_80387_constants ();
5160 for (i = 0; i < 5; i++)
5161 if (real_identical (r, &ext_80387_constants_table[i]))
5162 return i + 3;
5165 /* Load of the constant -0.0 or -1.0 will be split as
5166 fldz;fchs or fld1;fchs sequence. */
5167 if (real_isnegzero (r))
5168 return 8;
5169 if (real_identical (r, &dconstm1))
5170 return 9;
5172 return 0;
5175 /* Return the opcode of the special instruction to be used to load
5176 the constant X. */
5178 const char *
5179 standard_80387_constant_opcode (rtx x)
5181 switch (standard_80387_constant_p (x))
5183 case 1:
5184 return "fldz";
5185 case 2:
5186 return "fld1";
5187 case 3:
5188 return "fldlg2";
5189 case 4:
5190 return "fldln2";
5191 case 5:
5192 return "fldl2e";
5193 case 6:
5194 return "fldl2t";
5195 case 7:
5196 return "fldpi";
5197 case 8:
5198 case 9:
5199 return "#";
5200 default:
5201 gcc_unreachable ();
5205 /* Return the CONST_DOUBLE representing the 80387 constant that is
5206 loaded by the specified special instruction. The argument IDX
5207 matches the return value from standard_80387_constant_p. */
5210 standard_80387_constant_rtx (int idx)
5212 int i;
5214 if (! ext_80387_constants_init)
5215 init_ext_80387_constants ();
5217 switch (idx)
5219 case 3:
5220 case 4:
5221 case 5:
5222 case 6:
5223 case 7:
5224 i = idx - 3;
5225 break;
5227 default:
5228 gcc_unreachable ();
5231 return const_double_from_real_value (ext_80387_constants_table[i],
5232 XFmode);
5235 /* Return 1 if X is all bits 0, 2 if X is all bits 1
5236 and 3 if X is all bits 1 with zero extend
5237 in supported SSE/AVX vector mode. */
5240 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5242 machine_mode mode;
5244 if (!TARGET_SSE)
5245 return 0;
5247 mode = GET_MODE (x);
5249 if (x == const0_rtx || const0_operand (x, mode))
5250 return 1;
5252 if (x == constm1_rtx
5253 || vector_all_ones_operand (x, mode)
5254 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5255 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5256 && float_vector_all_ones_operand (x, mode)))
5258 /* VOIDmode integer constant, get mode from the predicate. */
5259 if (mode == VOIDmode)
5260 mode = pred_mode;
5262 switch (GET_MODE_SIZE (mode))
5264 case 64:
5265 if (TARGET_AVX512F)
5266 return 2;
5267 break;
5268 case 32:
5269 if (TARGET_AVX2)
5270 return 2;
5271 break;
5272 case 16:
5273 if (TARGET_SSE2)
5274 return 2;
5275 break;
5276 case 0:
5277 /* VOIDmode */
5278 gcc_unreachable ();
5279 default:
5280 break;
5284 if (vector_all_ones_zero_extend_half_operand (x, mode)
5285 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5286 return 3;
5288 return 0;
5291 /* Return the opcode of the special instruction to be used to load
5292 the constant operands[1] into operands[0]. */
5294 const char *
5295 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5297 machine_mode mode;
5298 rtx x = operands[1];
5300 gcc_assert (TARGET_SSE);
5302 mode = GET_MODE (x);
5304 if (x == const0_rtx || const0_operand (x, mode))
5306 switch (get_attr_mode (insn))
5308 case MODE_TI:
5309 if (!EXT_REX_SSE_REG_P (operands[0]))
5310 return "%vpxor\t%0, %d0";
5311 /* FALLTHRU */
5312 case MODE_XI:
5313 case MODE_OI:
5314 if (EXT_REX_SSE_REG_P (operands[0]))
5315 return (TARGET_AVX512VL
5316 ? "vpxord\t%x0, %x0, %x0"
5317 : "vpxord\t%g0, %g0, %g0");
5318 return "vpxor\t%x0, %x0, %x0";
5320 case MODE_V2DF:
5321 if (!EXT_REX_SSE_REG_P (operands[0]))
5322 return "%vxorpd\t%0, %d0";
5323 /* FALLTHRU */
5324 case MODE_V8DF:
5325 case MODE_V4DF:
5326 if (!EXT_REX_SSE_REG_P (operands[0]))
5327 return "vxorpd\t%x0, %x0, %x0";
5328 else if (TARGET_AVX512DQ)
5329 return (TARGET_AVX512VL
5330 ? "vxorpd\t%x0, %x0, %x0"
5331 : "vxorpd\t%g0, %g0, %g0");
5332 else
5333 return (TARGET_AVX512VL
5334 ? "vpxorq\t%x0, %x0, %x0"
5335 : "vpxorq\t%g0, %g0, %g0");
5337 case MODE_V4SF:
5338 if (!EXT_REX_SSE_REG_P (operands[0]))
5339 return "%vxorps\t%0, %d0";
5340 /* FALLTHRU */
5341 case MODE_V16SF:
5342 case MODE_V8SF:
5343 if (!EXT_REX_SSE_REG_P (operands[0]))
5344 return "vxorps\t%x0, %x0, %x0";
5345 else if (TARGET_AVX512DQ)
5346 return (TARGET_AVX512VL
5347 ? "vxorps\t%x0, %x0, %x0"
5348 : "vxorps\t%g0, %g0, %g0");
5349 else
5350 return (TARGET_AVX512VL
5351 ? "vpxord\t%x0, %x0, %x0"
5352 : "vpxord\t%g0, %g0, %g0");
5354 default:
5355 gcc_unreachable ();
5358 else if (x == constm1_rtx
5359 || vector_all_ones_operand (x, mode)
5360 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5361 && float_vector_all_ones_operand (x, mode)))
5363 enum attr_mode insn_mode = get_attr_mode (insn);
5365 switch (insn_mode)
5367 case MODE_XI:
5368 case MODE_V8DF:
5369 case MODE_V16SF:
5370 gcc_assert (TARGET_AVX512F);
5371 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5373 case MODE_OI:
5374 case MODE_V4DF:
5375 case MODE_V8SF:
5376 gcc_assert (TARGET_AVX2);
5377 /* FALLTHRU */
5378 case MODE_TI:
5379 case MODE_V2DF:
5380 case MODE_V4SF:
5381 gcc_assert (TARGET_SSE2);
5382 if (!EXT_REX_SSE_REG_P (operands[0]))
5383 return (TARGET_AVX
5384 ? "vpcmpeqd\t%0, %0, %0"
5385 : "pcmpeqd\t%0, %0");
5386 else if (TARGET_AVX512VL)
5387 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5388 else
5389 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5391 default:
5392 gcc_unreachable ();
5395 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5397 if (GET_MODE_SIZE (mode) == 64)
5399 gcc_assert (TARGET_AVX512F);
5400 return "vpcmpeqd\t%t0, %t0, %t0";
5402 else if (GET_MODE_SIZE (mode) == 32)
5404 gcc_assert (TARGET_AVX);
5405 return "vpcmpeqd\t%x0, %x0, %x0";
5407 gcc_unreachable ();
5409 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5411 gcc_assert (TARGET_AVX512F);
5412 return "vpcmpeqd\t%x0, %x0, %x0";
5415 gcc_unreachable ();
5418 /* Returns true if INSN can be transformed from a memory load
5419 to a supported FP constant load. */
5421 bool
5422 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5424 rtx src = find_constant_src (insn);
5426 gcc_assert (REG_P (dst));
5428 if (src == NULL
5429 || (SSE_REGNO_P (REGNO (dst))
5430 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5431 || (STACK_REGNO_P (REGNO (dst))
5432 && standard_80387_constant_p (src) < 1))
5433 return false;
5435 return true;
5438 /* Predicate for pre-reload splitters with associated instructions,
5439 which can match any time before the split1 pass (usually combine),
5440 then are unconditionally split in that pass and should not be
5441 matched again afterwards. */
5443 bool
5444 ix86_pre_reload_split (void)
5446 return (can_create_pseudo_p ()
5447 && !(cfun->curr_properties & PROP_rtl_split_insns));
5450 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5451 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5452 TARGET_AVX512VL or it is a register to register move which can
5453 be done with zmm register move. */
5455 static const char *
5456 ix86_get_ssemov (rtx *operands, unsigned size,
5457 enum attr_mode insn_mode, machine_mode mode)
5459 char buf[128];
5460 bool misaligned_p = (misaligned_operand (operands[0], mode)
5461 || misaligned_operand (operands[1], mode));
5462 bool evex_reg_p = (size == 64
5463 || EXT_REX_SSE_REG_P (operands[0])
5464 || EXT_REX_SSE_REG_P (operands[1]));
5465 machine_mode scalar_mode;
5467 const char *opcode = NULL;
5468 enum
5470 opcode_int,
5471 opcode_float,
5472 opcode_double
5473 } type = opcode_int;
5475 switch (insn_mode)
5477 case MODE_V16SF:
5478 case MODE_V8SF:
5479 case MODE_V4SF:
5480 scalar_mode = E_SFmode;
5481 type = opcode_float;
5482 break;
5483 case MODE_V8DF:
5484 case MODE_V4DF:
5485 case MODE_V2DF:
5486 scalar_mode = E_DFmode;
5487 type = opcode_double;
5488 break;
5489 case MODE_XI:
5490 case MODE_OI:
5491 case MODE_TI:
5492 scalar_mode = GET_MODE_INNER (mode);
5493 break;
5494 default:
5495 gcc_unreachable ();
5498 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5499 we can only use zmm register move without memory operand. */
5500 if (evex_reg_p
5501 && !TARGET_AVX512VL
5502 && GET_MODE_SIZE (mode) < 64)
5504 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5505 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5506 AVX512VL is disabled, LRA can still generate reg to
5507 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5508 modes. */
5509 if (memory_operand (operands[0], mode)
5510 || memory_operand (operands[1], mode))
5511 gcc_unreachable ();
5512 size = 64;
5513 switch (type)
5515 case opcode_int:
5516 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5517 opcode = (misaligned_p
5518 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5519 : "vmovdqa64");
5520 else
5521 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5522 break;
5523 case opcode_float:
5524 opcode = misaligned_p ? "vmovups" : "vmovaps";
5525 break;
5526 case opcode_double:
5527 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5528 break;
5531 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5533 switch (scalar_mode)
5535 case E_HFmode:
5536 case E_BFmode:
5537 if (evex_reg_p)
5538 opcode = (misaligned_p
5539 ? (TARGET_AVX512BW
5540 ? "vmovdqu16"
5541 : "vmovdqu64")
5542 : "vmovdqa64");
5543 else
5544 opcode = (misaligned_p
5545 ? (TARGET_AVX512BW
5546 ? "vmovdqu16"
5547 : "%vmovdqu")
5548 : "%vmovdqa");
5549 break;
5550 case E_SFmode:
5551 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5552 break;
5553 case E_DFmode:
5554 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5555 break;
5556 case E_TFmode:
5557 if (evex_reg_p)
5558 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5559 else
5560 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5561 break;
5562 default:
5563 gcc_unreachable ();
5566 else if (SCALAR_INT_MODE_P (scalar_mode))
5568 switch (scalar_mode)
5570 case E_QImode:
5571 if (evex_reg_p)
5572 opcode = (misaligned_p
5573 ? (TARGET_AVX512BW
5574 ? "vmovdqu8"
5575 : "vmovdqu64")
5576 : "vmovdqa64");
5577 else
5578 opcode = (misaligned_p
5579 ? (TARGET_AVX512BW
5580 ? "vmovdqu8"
5581 : "%vmovdqu")
5582 : "%vmovdqa");
5583 break;
5584 case E_HImode:
5585 if (evex_reg_p)
5586 opcode = (misaligned_p
5587 ? (TARGET_AVX512BW
5588 ? "vmovdqu16"
5589 : "vmovdqu64")
5590 : "vmovdqa64");
5591 else
5592 opcode = (misaligned_p
5593 ? (TARGET_AVX512BW
5594 ? "vmovdqu16"
5595 : "%vmovdqu")
5596 : "%vmovdqa");
5597 break;
5598 case E_SImode:
5599 if (evex_reg_p)
5600 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5601 else
5602 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5603 break;
5604 case E_DImode:
5605 case E_TImode:
5606 case E_OImode:
5607 if (evex_reg_p)
5608 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5609 else
5610 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5611 break;
5612 case E_XImode:
5613 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5614 break;
5615 default:
5616 gcc_unreachable ();
5619 else
5620 gcc_unreachable ();
5622 switch (size)
5624 case 64:
5625 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5626 opcode);
5627 break;
5628 case 32:
5629 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5630 opcode);
5631 break;
5632 case 16:
5633 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5634 opcode);
5635 break;
5636 default:
5637 gcc_unreachable ();
5639 output_asm_insn (buf, operands);
5640 return "";
5643 /* Return the template of the TYPE_SSEMOV instruction to move
5644 operands[1] into operands[0]. */
5646 const char *
5647 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5649 machine_mode mode = GET_MODE (operands[0]);
5650 if (get_attr_type (insn) != TYPE_SSEMOV
5651 || mode != GET_MODE (operands[1]))
5652 gcc_unreachable ();
5654 enum attr_mode insn_mode = get_attr_mode (insn);
5656 switch (insn_mode)
5658 case MODE_XI:
5659 case MODE_V8DF:
5660 case MODE_V16SF:
5661 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5663 case MODE_OI:
5664 case MODE_V4DF:
5665 case MODE_V8SF:
5666 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5668 case MODE_TI:
5669 case MODE_V2DF:
5670 case MODE_V4SF:
5671 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5673 case MODE_DI:
5674 /* Handle broken assemblers that require movd instead of movq. */
5675 if (GENERAL_REG_P (operands[0]))
5677 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5678 return "%vmovq\t{%1, %q0|%q0, %1}";
5679 else
5680 return "%vmovd\t{%1, %q0|%q0, %1}";
5682 else if (GENERAL_REG_P (operands[1]))
5684 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5685 return "%vmovq\t{%q1, %0|%0, %q1}";
5686 else
5687 return "%vmovd\t{%q1, %0|%0, %q1}";
5689 else
5690 return "%vmovq\t{%1, %0|%0, %1}";
5692 case MODE_SI:
5693 if (GENERAL_REG_P (operands[0]))
5694 return "%vmovd\t{%1, %k0|%k0, %1}";
5695 else if (GENERAL_REG_P (operands[1]))
5696 return "%vmovd\t{%k1, %0|%0, %k1}";
5697 else
5698 return "%vmovd\t{%1, %0|%0, %1}";
5700 case MODE_HI:
5701 if (GENERAL_REG_P (operands[0]))
5702 return "vmovw\t{%1, %k0|%k0, %1}";
5703 else if (GENERAL_REG_P (operands[1]))
5704 return "vmovw\t{%k1, %0|%0, %k1}";
5705 else
5706 return "vmovw\t{%1, %0|%0, %1}";
5708 case MODE_DF:
5709 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5710 return "vmovsd\t{%d1, %0|%0, %d1}";
5711 else
5712 return "%vmovsd\t{%1, %0|%0, %1}";
5714 case MODE_SF:
5715 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5716 return "vmovss\t{%d1, %0|%0, %d1}";
5717 else
5718 return "%vmovss\t{%1, %0|%0, %1}";
5720 case MODE_HF:
5721 case MODE_BF:
5722 if (REG_P (operands[0]) && REG_P (operands[1]))
5723 return "vmovsh\t{%d1, %0|%0, %d1}";
5724 else
5725 return "vmovsh\t{%1, %0|%0, %1}";
5727 case MODE_V1DF:
5728 gcc_assert (!TARGET_AVX);
5729 return "movlpd\t{%1, %0|%0, %1}";
5731 case MODE_V2SF:
5732 if (TARGET_AVX && REG_P (operands[0]))
5733 return "vmovlps\t{%1, %d0|%d0, %1}";
5734 else
5735 return "%vmovlps\t{%1, %0|%0, %1}";
5737 default:
5738 gcc_unreachable ();
5742 /* Returns true if OP contains a symbol reference */
5744 bool
5745 symbolic_reference_mentioned_p (rtx op)
5747 const char *fmt;
5748 int i;
5750 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5751 return true;
5753 fmt = GET_RTX_FORMAT (GET_CODE (op));
5754 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5756 if (fmt[i] == 'E')
5758 int j;
5760 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5761 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5762 return true;
5765 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5766 return true;
5769 return false;
5772 /* Return true if it is appropriate to emit `ret' instructions in the
5773 body of a function. Do this only if the epilogue is simple, needing a
5774 couple of insns. Prior to reloading, we can't tell how many registers
5775 must be saved, so return false then. Return false if there is no frame
5776 marker to de-allocate. */
5778 bool
5779 ix86_can_use_return_insn_p (void)
5781 if (ix86_function_ms_hook_prologue (current_function_decl))
5782 return false;
5784 if (ix86_function_naked (current_function_decl))
5785 return false;
5787 /* Don't use `ret' instruction in interrupt handler. */
5788 if (! reload_completed
5789 || frame_pointer_needed
5790 || cfun->machine->func_type != TYPE_NORMAL)
5791 return 0;
5793 /* Don't allow more than 32k pop, since that's all we can do
5794 with one instruction. */
5795 if (crtl->args.pops_args && crtl->args.size >= 32768)
5796 return 0;
5798 struct ix86_frame &frame = cfun->machine->frame;
5799 return (frame.stack_pointer_offset == UNITS_PER_WORD
5800 && (frame.nregs + frame.nsseregs) == 0);
5803 /* Return stack frame size. get_frame_size () returns used stack slots
5804 during compilation, which may be optimized out later. If stack frame
5805 is needed, stack_frame_required should be true. */
5807 static HOST_WIDE_INT
5808 ix86_get_frame_size (void)
5810 if (cfun->machine->stack_frame_required)
5811 return get_frame_size ();
5812 else
5813 return 0;
5816 /* Value should be nonzero if functions must have frame pointers.
5817 Zero means the frame pointer need not be set up (and parms may
5818 be accessed via the stack pointer) in functions that seem suitable. */
5820 static bool
5821 ix86_frame_pointer_required (void)
5823 /* If we accessed previous frames, then the generated code expects
5824 to be able to access the saved ebp value in our frame. */
5825 if (cfun->machine->accesses_prev_frame)
5826 return true;
5828 /* Several x86 os'es need a frame pointer for other reasons,
5829 usually pertaining to setjmp. */
5830 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5831 return true;
5833 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5834 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5835 return true;
5837 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5838 allocation is 4GB. */
5839 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5840 return true;
5842 /* SSE saves require frame-pointer when stack is misaligned. */
5843 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5844 return true;
5846 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5847 turns off the frame pointer by default. Turn it back on now if
5848 we've not got a leaf function. */
5849 if (TARGET_OMIT_LEAF_FRAME_POINTER
5850 && (!crtl->is_leaf
5851 || ix86_current_function_calls_tls_descriptor))
5852 return true;
5854 /* Several versions of mcount for the x86 assumes that there is a
5855 frame, so we cannot allow profiling without a frame pointer. */
5856 if (crtl->profile && !flag_fentry)
5857 return true;
5859 return false;
5862 /* Record that the current function accesses previous call frames. */
5864 void
5865 ix86_setup_frame_addresses (void)
5867 cfun->machine->accesses_prev_frame = 1;
5870 #ifndef USE_HIDDEN_LINKONCE
5871 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5872 # define USE_HIDDEN_LINKONCE 1
5873 # else
5874 # define USE_HIDDEN_LINKONCE 0
5875 # endif
5876 #endif
5878 /* Label count for call and return thunks. It is used to make unique
5879 labels in call and return thunks. */
5880 static int indirectlabelno;
5882 /* True if call thunk function is needed. */
5883 static bool indirect_thunk_needed = false;
5885 /* Bit masks of integer registers, which contain branch target, used
5886 by call thunk functions. */
5887 static HARD_REG_SET indirect_thunks_used;
5889 /* True if return thunk function is needed. */
5890 static bool indirect_return_needed = false;
5892 /* True if return thunk function via CX is needed. */
5893 static bool indirect_return_via_cx;
5895 #ifndef INDIRECT_LABEL
5896 # define INDIRECT_LABEL "LIND"
5897 #endif
5899 /* Indicate what prefix is needed for an indirect branch. */
5900 enum indirect_thunk_prefix
5902 indirect_thunk_prefix_none,
5903 indirect_thunk_prefix_nt
5906 /* Return the prefix needed for an indirect branch INSN. */
5908 enum indirect_thunk_prefix
5909 indirect_thunk_need_prefix (rtx_insn *insn)
5911 enum indirect_thunk_prefix need_prefix;
5912 if ((cfun->machine->indirect_branch_type
5913 == indirect_branch_thunk_extern)
5914 && ix86_notrack_prefixed_insn_p (insn))
5916 /* NOTRACK prefix is only used with external thunk so that it
5917 can be properly updated to support CET at run-time. */
5918 need_prefix = indirect_thunk_prefix_nt;
5920 else
5921 need_prefix = indirect_thunk_prefix_none;
5922 return need_prefix;
5925 /* Fills in the label name that should be used for the indirect thunk. */
5927 static void
5928 indirect_thunk_name (char name[32], unsigned int regno,
5929 enum indirect_thunk_prefix need_prefix,
5930 bool ret_p)
5932 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5933 gcc_unreachable ();
5935 if (USE_HIDDEN_LINKONCE)
5937 const char *prefix;
5939 if (need_prefix == indirect_thunk_prefix_nt
5940 && regno != INVALID_REGNUM)
5942 /* NOTRACK prefix is only used with external thunk via
5943 register so that NOTRACK prefix can be added to indirect
5944 branch via register to support CET at run-time. */
5945 prefix = "_nt";
5947 else
5948 prefix = "";
5950 const char *ret = ret_p ? "return" : "indirect";
5952 if (regno != INVALID_REGNUM)
5954 const char *reg_prefix;
5955 if (LEGACY_INT_REGNO_P (regno))
5956 reg_prefix = TARGET_64BIT ? "r" : "e";
5957 else
5958 reg_prefix = "";
5959 sprintf (name, "__x86_%s_thunk%s_%s%s",
5960 ret, prefix, reg_prefix, reg_names[regno]);
5962 else
5963 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5965 else
5967 if (regno != INVALID_REGNUM)
5968 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5969 else
5971 if (ret_p)
5972 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5973 else
5974 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5979 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5980 the function address is in REGNO and the call and return thunk looks like:
5982 call L2
5984 pause
5985 lfence
5986 jmp L1
5988 mov %REG, (%sp)
5991 Otherwise, the function address is on the top of stack and the
5992 call and return thunk looks like:
5994 call L2
5996 pause
5997 lfence
5998 jmp L1
6000 lea WORD_SIZE(%sp), %sp
6004 static void
6005 output_indirect_thunk (unsigned int regno)
6007 char indirectlabel1[32];
6008 char indirectlabel2[32];
6010 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
6011 indirectlabelno++);
6012 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
6013 indirectlabelno++);
6015 /* Call */
6016 fputs ("\tcall\t", asm_out_file);
6017 assemble_name_raw (asm_out_file, indirectlabel2);
6018 fputc ('\n', asm_out_file);
6020 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
6022 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
6023 Usage of both pause + lfence is compromise solution. */
6024 fprintf (asm_out_file, "\tpause\n\tlfence\n");
6026 /* Jump. */
6027 fputs ("\tjmp\t", asm_out_file);
6028 assemble_name_raw (asm_out_file, indirectlabel1);
6029 fputc ('\n', asm_out_file);
6031 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
6033 /* The above call insn pushed a word to stack. Adjust CFI info. */
6034 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
6036 if (! dwarf2out_do_cfi_asm ())
6038 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6039 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6040 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6041 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6043 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6044 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6045 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6046 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6047 dwarf2out_emit_cfi (xcfi);
6050 if (regno != INVALID_REGNUM)
6052 /* MOV. */
6053 rtx xops[2];
6054 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6055 xops[1] = gen_rtx_REG (word_mode, regno);
6056 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6058 else
6060 /* LEA. */
6061 rtx xops[2];
6062 xops[0] = stack_pointer_rtx;
6063 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6064 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6067 fputs ("\tret\n", asm_out_file);
6068 if ((ix86_harden_sls & harden_sls_return))
6069 fputs ("\tint3\n", asm_out_file);
6072 /* Output a funtion with a call and return thunk for indirect branch.
6073 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6074 Otherwise, the function address is on the top of stack. Thunk is
6075 used for function return if RET_P is true. */
6077 static void
6078 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6079 unsigned int regno, bool ret_p)
6081 char name[32];
6082 tree decl;
6084 /* Create __x86_indirect_thunk. */
6085 indirect_thunk_name (name, regno, need_prefix, ret_p);
6086 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6087 get_identifier (name),
6088 build_function_type_list (void_type_node, NULL_TREE));
6089 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6090 NULL_TREE, void_type_node);
6091 TREE_PUBLIC (decl) = 1;
6092 TREE_STATIC (decl) = 1;
6093 DECL_IGNORED_P (decl) = 1;
6095 #if TARGET_MACHO
6096 if (TARGET_MACHO)
6098 switch_to_section (darwin_sections[picbase_thunk_section]);
6099 fputs ("\t.weak_definition\t", asm_out_file);
6100 assemble_name (asm_out_file, name);
6101 fputs ("\n\t.private_extern\t", asm_out_file);
6102 assemble_name (asm_out_file, name);
6103 putc ('\n', asm_out_file);
6104 ASM_OUTPUT_LABEL (asm_out_file, name);
6105 DECL_WEAK (decl) = 1;
6107 else
6108 #endif
6109 if (USE_HIDDEN_LINKONCE)
6111 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6113 targetm.asm_out.unique_section (decl, 0);
6114 switch_to_section (get_named_section (decl, NULL, 0));
6116 targetm.asm_out.globalize_label (asm_out_file, name);
6117 fputs ("\t.hidden\t", asm_out_file);
6118 assemble_name (asm_out_file, name);
6119 putc ('\n', asm_out_file);
6120 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6122 else
6124 switch_to_section (text_section);
6125 ASM_OUTPUT_LABEL (asm_out_file, name);
6128 DECL_INITIAL (decl) = make_node (BLOCK);
6129 current_function_decl = decl;
6130 allocate_struct_function (decl, false);
6131 init_function_start (decl);
6132 /* We're about to hide the function body from callees of final_* by
6133 emitting it directly; tell them we're a thunk, if they care. */
6134 cfun->is_thunk = true;
6135 first_function_block_is_cold = false;
6136 /* Make sure unwind info is emitted for the thunk if needed. */
6137 final_start_function (emit_barrier (), asm_out_file, 1);
6139 output_indirect_thunk (regno);
6141 final_end_function ();
6142 init_insn_lengths ();
6143 free_after_compilation (cfun);
6144 set_cfun (NULL);
6145 current_function_decl = NULL;
6148 static int pic_labels_used;
6150 /* Fills in the label name that should be used for a pc thunk for
6151 the given register. */
6153 static void
6154 get_pc_thunk_name (char name[32], unsigned int regno)
6156 gcc_assert (!TARGET_64BIT);
6158 if (USE_HIDDEN_LINKONCE)
6159 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6160 else
6161 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6165 /* This function generates code for -fpic that loads %ebx with
6166 the return address of the caller and then returns. */
6168 static void
6169 ix86_code_end (void)
6171 rtx xops[2];
6172 unsigned int regno;
6174 if (indirect_return_needed)
6175 output_indirect_thunk_function (indirect_thunk_prefix_none,
6176 INVALID_REGNUM, true);
6177 if (indirect_return_via_cx)
6178 output_indirect_thunk_function (indirect_thunk_prefix_none,
6179 CX_REG, true);
6180 if (indirect_thunk_needed)
6181 output_indirect_thunk_function (indirect_thunk_prefix_none,
6182 INVALID_REGNUM, false);
6184 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6186 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6187 output_indirect_thunk_function (indirect_thunk_prefix_none,
6188 regno, false);
6191 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6193 char name[32];
6194 tree decl;
6196 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6197 output_indirect_thunk_function (indirect_thunk_prefix_none,
6198 regno, false);
6200 if (!(pic_labels_used & (1 << regno)))
6201 continue;
6203 get_pc_thunk_name (name, regno);
6205 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6206 get_identifier (name),
6207 build_function_type_list (void_type_node, NULL_TREE));
6208 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6209 NULL_TREE, void_type_node);
6210 TREE_PUBLIC (decl) = 1;
6211 TREE_STATIC (decl) = 1;
6212 DECL_IGNORED_P (decl) = 1;
6214 #if TARGET_MACHO
6215 if (TARGET_MACHO)
6217 switch_to_section (darwin_sections[picbase_thunk_section]);
6218 fputs ("\t.weak_definition\t", asm_out_file);
6219 assemble_name (asm_out_file, name);
6220 fputs ("\n\t.private_extern\t", asm_out_file);
6221 assemble_name (asm_out_file, name);
6222 putc ('\n', asm_out_file);
6223 ASM_OUTPUT_LABEL (asm_out_file, name);
6224 DECL_WEAK (decl) = 1;
6226 else
6227 #endif
6228 if (USE_HIDDEN_LINKONCE)
6230 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6232 targetm.asm_out.unique_section (decl, 0);
6233 switch_to_section (get_named_section (decl, NULL, 0));
6235 targetm.asm_out.globalize_label (asm_out_file, name);
6236 fputs ("\t.hidden\t", asm_out_file);
6237 assemble_name (asm_out_file, name);
6238 putc ('\n', asm_out_file);
6239 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6241 else
6243 switch_to_section (text_section);
6244 ASM_OUTPUT_LABEL (asm_out_file, name);
6247 DECL_INITIAL (decl) = make_node (BLOCK);
6248 current_function_decl = decl;
6249 allocate_struct_function (decl, false);
6250 init_function_start (decl);
6251 /* We're about to hide the function body from callees of final_* by
6252 emitting it directly; tell them we're a thunk, if they care. */
6253 cfun->is_thunk = true;
6254 first_function_block_is_cold = false;
6255 /* Make sure unwind info is emitted for the thunk if needed. */
6256 final_start_function (emit_barrier (), asm_out_file, 1);
6258 /* Pad stack IP move with 4 instructions (two NOPs count
6259 as one instruction). */
6260 if (TARGET_PAD_SHORT_FUNCTION)
6262 int i = 8;
6264 while (i--)
6265 fputs ("\tnop\n", asm_out_file);
6268 xops[0] = gen_rtx_REG (Pmode, regno);
6269 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6270 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6271 fputs ("\tret\n", asm_out_file);
6272 final_end_function ();
6273 init_insn_lengths ();
6274 free_after_compilation (cfun);
6275 set_cfun (NULL);
6276 current_function_decl = NULL;
6279 if (flag_split_stack)
6280 file_end_indicate_split_stack ();
6283 /* Emit code for the SET_GOT patterns. */
6285 const char *
6286 output_set_got (rtx dest, rtx label)
6288 rtx xops[3];
6290 xops[0] = dest;
6292 if (TARGET_VXWORKS_RTP && flag_pic)
6294 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6295 xops[2] = gen_rtx_MEM (Pmode,
6296 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6297 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6299 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6300 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6301 an unadorned address. */
6302 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6303 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6304 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6305 return "";
6308 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6310 if (flag_pic)
6312 char name[32];
6313 get_pc_thunk_name (name, REGNO (dest));
6314 pic_labels_used |= 1 << REGNO (dest);
6316 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6317 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6318 output_asm_insn ("%!call\t%X2", xops);
6320 #if TARGET_MACHO
6321 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6322 This is what will be referenced by the Mach-O PIC subsystem. */
6323 if (machopic_should_output_picbase_label () || !label)
6324 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6326 /* When we are restoring the pic base at the site of a nonlocal label,
6327 and we decided to emit the pic base above, we will still output a
6328 local label used for calculating the correction offset (even though
6329 the offset will be 0 in that case). */
6330 if (label)
6331 targetm.asm_out.internal_label (asm_out_file, "L",
6332 CODE_LABEL_NUMBER (label));
6333 #endif
6335 else
6337 if (TARGET_MACHO)
6338 /* We don't need a pic base, we're not producing pic. */
6339 gcc_unreachable ();
6341 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6342 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6343 targetm.asm_out.internal_label (asm_out_file, "L",
6344 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6347 if (!TARGET_MACHO)
6348 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6350 return "";
6353 /* Generate an "push" pattern for input ARG. */
6356 gen_push (rtx arg)
6358 struct machine_function *m = cfun->machine;
6360 if (m->fs.cfa_reg == stack_pointer_rtx)
6361 m->fs.cfa_offset += UNITS_PER_WORD;
6362 m->fs.sp_offset += UNITS_PER_WORD;
6364 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6365 arg = gen_rtx_REG (word_mode, REGNO (arg));
6367 return gen_rtx_SET (gen_rtx_MEM (word_mode,
6368 gen_rtx_PRE_DEC (Pmode,
6369 stack_pointer_rtx)),
6370 arg);
6373 /* Generate an "pop" pattern for input ARG. */
6376 gen_pop (rtx arg)
6378 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6379 arg = gen_rtx_REG (word_mode, REGNO (arg));
6381 return gen_rtx_SET (arg,
6382 gen_rtx_MEM (word_mode,
6383 gen_rtx_POST_INC (Pmode,
6384 stack_pointer_rtx)));
6387 /* Return >= 0 if there is an unused call-clobbered register available
6388 for the entire function. */
6390 static unsigned int
6391 ix86_select_alt_pic_regnum (void)
6393 if (ix86_use_pseudo_pic_reg ())
6394 return INVALID_REGNUM;
6396 if (crtl->is_leaf
6397 && !crtl->profile
6398 && !ix86_current_function_calls_tls_descriptor)
6400 int i, drap;
6401 /* Can't use the same register for both PIC and DRAP. */
6402 if (crtl->drap_reg)
6403 drap = REGNO (crtl->drap_reg);
6404 else
6405 drap = -1;
6406 for (i = 2; i >= 0; --i)
6407 if (i != drap && !df_regs_ever_live_p (i))
6408 return i;
6411 return INVALID_REGNUM;
6414 /* Return true if REGNO is used by the epilogue. */
6416 bool
6417 ix86_epilogue_uses (int regno)
6419 /* If there are no caller-saved registers, we preserve all registers,
6420 except for MMX and x87 registers which aren't supported when saving
6421 and restoring registers. Don't explicitly save SP register since
6422 it is always preserved. */
6423 return (epilogue_completed
6424 && cfun->machine->no_caller_saved_registers
6425 && !fixed_regs[regno]
6426 && !STACK_REGNO_P (regno)
6427 && !MMX_REGNO_P (regno));
6430 /* Return nonzero if register REGNO can be used as a scratch register
6431 in peephole2. */
6433 static bool
6434 ix86_hard_regno_scratch_ok (unsigned int regno)
6436 /* If there are no caller-saved registers, we can't use any register
6437 as a scratch register after epilogue and use REGNO as scratch
6438 register only if it has been used before to avoid saving and
6439 restoring it. */
6440 return (!cfun->machine->no_caller_saved_registers
6441 || (!epilogue_completed
6442 && df_regs_ever_live_p (regno)));
6445 /* Return TRUE if we need to save REGNO. */
6447 bool
6448 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6450 /* If there are no caller-saved registers, we preserve all registers,
6451 except for MMX and x87 registers which aren't supported when saving
6452 and restoring registers. Don't explicitly save SP register since
6453 it is always preserved. */
6454 if (cfun->machine->no_caller_saved_registers)
6456 /* Don't preserve registers used for function return value. */
6457 rtx reg = crtl->return_rtx;
6458 if (reg)
6460 unsigned int i = REGNO (reg);
6461 unsigned int nregs = REG_NREGS (reg);
6462 while (nregs-- > 0)
6463 if ((i + nregs) == regno)
6464 return false;
6467 return (df_regs_ever_live_p (regno)
6468 && !fixed_regs[regno]
6469 && !STACK_REGNO_P (regno)
6470 && !MMX_REGNO_P (regno)
6471 && (regno != HARD_FRAME_POINTER_REGNUM
6472 || !frame_pointer_needed));
6475 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6476 && pic_offset_table_rtx)
6478 if (ix86_use_pseudo_pic_reg ())
6480 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6481 _mcount in prologue. */
6482 if (!TARGET_64BIT && flag_pic && crtl->profile)
6483 return true;
6485 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6486 || crtl->profile
6487 || crtl->calls_eh_return
6488 || crtl->uses_const_pool
6489 || cfun->has_nonlocal_label)
6490 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6493 if (crtl->calls_eh_return && maybe_eh_return)
6495 unsigned i;
6496 for (i = 0; ; i++)
6498 unsigned test = EH_RETURN_DATA_REGNO (i);
6499 if (test == INVALID_REGNUM)
6500 break;
6501 if (test == regno)
6502 return true;
6506 if (ignore_outlined && cfun->machine->call_ms2sysv)
6508 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6509 + xlogue_layout::MIN_REGS;
6510 if (xlogue_layout::is_stub_managed_reg (regno, count))
6511 return false;
6514 if (crtl->drap_reg
6515 && regno == REGNO (crtl->drap_reg)
6516 && !cfun->machine->no_drap_save_restore)
6517 return true;
6519 return (df_regs_ever_live_p (regno)
6520 && !call_used_or_fixed_reg_p (regno)
6521 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6524 /* Return number of saved general prupose registers. */
6526 static int
6527 ix86_nsaved_regs (void)
6529 int nregs = 0;
6530 int regno;
6532 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6533 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6534 nregs ++;
6535 return nregs;
6538 /* Return number of saved SSE registers. */
6540 static int
6541 ix86_nsaved_sseregs (void)
6543 int nregs = 0;
6544 int regno;
6546 if (!TARGET_64BIT_MS_ABI)
6547 return 0;
6548 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6549 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6550 nregs ++;
6551 return nregs;
6554 /* Given FROM and TO register numbers, say whether this elimination is
6555 allowed. If stack alignment is needed, we can only replace argument
6556 pointer with hard frame pointer, or replace frame pointer with stack
6557 pointer. Otherwise, frame pointer elimination is automatically
6558 handled and all other eliminations are valid. */
6560 static bool
6561 ix86_can_eliminate (const int from, const int to)
6563 if (stack_realign_fp)
6564 return ((from == ARG_POINTER_REGNUM
6565 && to == HARD_FRAME_POINTER_REGNUM)
6566 || (from == FRAME_POINTER_REGNUM
6567 && to == STACK_POINTER_REGNUM));
6568 else
6569 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6572 /* Return the offset between two registers, one to be eliminated, and the other
6573 its replacement, at the start of a routine. */
6575 HOST_WIDE_INT
6576 ix86_initial_elimination_offset (int from, int to)
6578 struct ix86_frame &frame = cfun->machine->frame;
6580 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6581 return frame.hard_frame_pointer_offset;
6582 else if (from == FRAME_POINTER_REGNUM
6583 && to == HARD_FRAME_POINTER_REGNUM)
6584 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6585 else
6587 gcc_assert (to == STACK_POINTER_REGNUM);
6589 if (from == ARG_POINTER_REGNUM)
6590 return frame.stack_pointer_offset;
6592 gcc_assert (from == FRAME_POINTER_REGNUM);
6593 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6597 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6598 void
6599 warn_once_call_ms2sysv_xlogues (const char *feature)
6601 static bool warned_once = false;
6602 if (!warned_once)
6604 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6605 feature);
6606 warned_once = true;
6610 /* Return the probing interval for -fstack-clash-protection. */
6612 static HOST_WIDE_INT
6613 get_probe_interval (void)
6615 if (flag_stack_clash_protection)
6616 return (HOST_WIDE_INT_1U
6617 << param_stack_clash_protection_probe_interval);
6618 else
6619 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6622 /* When using -fsplit-stack, the allocation routines set a field in
6623 the TCB to the bottom of the stack plus this much space, measured
6624 in bytes. */
6626 #define SPLIT_STACK_AVAILABLE 256
6628 /* Fill structure ix86_frame about frame of currently computed function. */
6630 static void
6631 ix86_compute_frame_layout (void)
6633 struct ix86_frame *frame = &cfun->machine->frame;
6634 struct machine_function *m = cfun->machine;
6635 unsigned HOST_WIDE_INT stack_alignment_needed;
6636 HOST_WIDE_INT offset;
6637 unsigned HOST_WIDE_INT preferred_alignment;
6638 HOST_WIDE_INT size = ix86_get_frame_size ();
6639 HOST_WIDE_INT to_allocate;
6641 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6642 * ms_abi functions that call a sysv function. We now need to prune away
6643 * cases where it should be disabled. */
6644 if (TARGET_64BIT && m->call_ms2sysv)
6646 gcc_assert (TARGET_64BIT_MS_ABI);
6647 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6648 gcc_assert (!TARGET_SEH);
6649 gcc_assert (TARGET_SSE);
6650 gcc_assert (!ix86_using_red_zone ());
6652 if (crtl->calls_eh_return)
6654 gcc_assert (!reload_completed);
6655 m->call_ms2sysv = false;
6656 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6659 else if (ix86_static_chain_on_stack)
6661 gcc_assert (!reload_completed);
6662 m->call_ms2sysv = false;
6663 warn_once_call_ms2sysv_xlogues ("static call chains");
6666 /* Finally, compute which registers the stub will manage. */
6667 else
6669 unsigned count = xlogue_layout::count_stub_managed_regs ();
6670 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6671 m->call_ms2sysv_pad_in = 0;
6675 frame->nregs = ix86_nsaved_regs ();
6676 frame->nsseregs = ix86_nsaved_sseregs ();
6678 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6679 except for function prologues, leaf functions and when the defult
6680 incoming stack boundary is overriden at command line or via
6681 force_align_arg_pointer attribute.
6683 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6684 at call sites, including profile function calls.
6686 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6687 && crtl->preferred_stack_boundary < 128)
6688 && (!crtl->is_leaf || cfun->calls_alloca != 0
6689 || ix86_current_function_calls_tls_descriptor
6690 || (TARGET_MACHO && crtl->profile)
6691 || ix86_incoming_stack_boundary < 128))
6693 crtl->preferred_stack_boundary = 128;
6694 crtl->stack_alignment_needed = 128;
6697 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6698 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6700 gcc_assert (!size || stack_alignment_needed);
6701 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6702 gcc_assert (preferred_alignment <= stack_alignment_needed);
6704 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6705 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6706 if (TARGET_64BIT && m->call_ms2sysv)
6708 gcc_assert (stack_alignment_needed >= 16);
6709 gcc_assert (!frame->nsseregs);
6712 /* For SEH we have to limit the amount of code movement into the prologue.
6713 At present we do this via a BLOCKAGE, at which point there's very little
6714 scheduling that can be done, which means that there's very little point
6715 in doing anything except PUSHs. */
6716 if (TARGET_SEH)
6717 m->use_fast_prologue_epilogue = false;
6718 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6720 int count = frame->nregs;
6721 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6723 /* The fast prologue uses move instead of push to save registers. This
6724 is significantly longer, but also executes faster as modern hardware
6725 can execute the moves in parallel, but can't do that for push/pop.
6727 Be careful about choosing what prologue to emit: When function takes
6728 many instructions to execute we may use slow version as well as in
6729 case function is known to be outside hot spot (this is known with
6730 feedback only). Weight the size of function by number of registers
6731 to save as it is cheap to use one or two push instructions but very
6732 slow to use many of them.
6734 Calling this hook multiple times with the same frame requirements
6735 must produce the same layout, since the RA might otherwise be
6736 unable to reach a fixed point or might fail its final sanity checks.
6737 This means that once we've assumed that a function does or doesn't
6738 have a particular size, we have to stick to that assumption
6739 regardless of how the function has changed since. */
6740 if (count)
6741 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6742 if (node->frequency < NODE_FREQUENCY_NORMAL
6743 || (flag_branch_probabilities
6744 && node->frequency < NODE_FREQUENCY_HOT))
6745 m->use_fast_prologue_epilogue = false;
6746 else
6748 if (count != frame->expensive_count)
6750 frame->expensive_count = count;
6751 frame->expensive_p = expensive_function_p (count);
6753 m->use_fast_prologue_epilogue = !frame->expensive_p;
6757 frame->save_regs_using_mov
6758 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6760 /* Skip return address and error code in exception handler. */
6761 offset = INCOMING_FRAME_SP_OFFSET;
6763 /* Skip pushed static chain. */
6764 if (ix86_static_chain_on_stack)
6765 offset += UNITS_PER_WORD;
6767 /* Skip saved base pointer. */
6768 if (frame_pointer_needed)
6769 offset += UNITS_PER_WORD;
6770 frame->hfp_save_offset = offset;
6772 /* The traditional frame pointer location is at the top of the frame. */
6773 frame->hard_frame_pointer_offset = offset;
6775 /* Register save area */
6776 offset += frame->nregs * UNITS_PER_WORD;
6777 frame->reg_save_offset = offset;
6779 /* Calculate the size of the va-arg area (not including padding, if any). */
6780 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6782 /* Also adjust stack_realign_offset for the largest alignment of
6783 stack slot actually used. */
6784 if (stack_realign_fp
6785 || (cfun->machine->max_used_stack_alignment != 0
6786 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6788 /* We may need a 16-byte aligned stack for the remainder of the
6789 register save area, but the stack frame for the local function
6790 may require a greater alignment if using AVX/2/512. In order
6791 to avoid wasting space, we first calculate the space needed for
6792 the rest of the register saves, add that to the stack pointer,
6793 and then realign the stack to the boundary of the start of the
6794 frame for the local function. */
6795 HOST_WIDE_INT space_needed = 0;
6796 HOST_WIDE_INT sse_reg_space_needed = 0;
6798 if (TARGET_64BIT)
6800 if (m->call_ms2sysv)
6802 m->call_ms2sysv_pad_in = 0;
6803 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6806 else if (frame->nsseregs)
6807 /* The only ABI that has saved SSE registers (Win64) also has a
6808 16-byte aligned default stack. However, many programs violate
6809 the ABI, and Wine64 forces stack realignment to compensate. */
6810 space_needed = frame->nsseregs * 16;
6812 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6814 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6815 rounding to be pedantic. */
6816 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6818 else
6819 space_needed = frame->va_arg_size;
6821 /* Record the allocation size required prior to the realignment AND. */
6822 frame->stack_realign_allocate = space_needed;
6824 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6825 before this point are not directly comparable with values below
6826 this point. Use sp_valid_at to determine if the stack pointer is
6827 valid for a given offset, fp_valid_at for the frame pointer, or
6828 choose_baseaddr to have a base register chosen for you.
6830 Note that the result of (frame->stack_realign_offset
6831 & (stack_alignment_needed - 1)) may not equal zero. */
6832 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6833 frame->stack_realign_offset = offset - space_needed;
6834 frame->sse_reg_save_offset = frame->stack_realign_offset
6835 + sse_reg_space_needed;
6837 else
6839 frame->stack_realign_offset = offset;
6841 if (TARGET_64BIT && m->call_ms2sysv)
6843 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6844 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6847 /* Align and set SSE register save area. */
6848 else if (frame->nsseregs)
6850 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6851 required and the DRAP re-alignment boundary is at least 16 bytes,
6852 then we want the SSE register save area properly aligned. */
6853 if (ix86_incoming_stack_boundary >= 128
6854 || (stack_realign_drap && stack_alignment_needed >= 16))
6855 offset = ROUND_UP (offset, 16);
6856 offset += frame->nsseregs * 16;
6858 frame->sse_reg_save_offset = offset;
6859 offset += frame->va_arg_size;
6862 /* Align start of frame for local function. When a function call
6863 is removed, it may become a leaf function. But if argument may
6864 be passed on stack, we need to align the stack when there is no
6865 tail call. */
6866 if (m->call_ms2sysv
6867 || frame->va_arg_size != 0
6868 || size != 0
6869 || !crtl->is_leaf
6870 || (!crtl->tail_call_emit
6871 && cfun->machine->outgoing_args_on_stack)
6872 || cfun->calls_alloca
6873 || ix86_current_function_calls_tls_descriptor)
6874 offset = ROUND_UP (offset, stack_alignment_needed);
6876 /* Frame pointer points here. */
6877 frame->frame_pointer_offset = offset;
6879 offset += size;
6881 /* Add outgoing arguments area. Can be skipped if we eliminated
6882 all the function calls as dead code.
6883 Skipping is however impossible when function calls alloca. Alloca
6884 expander assumes that last crtl->outgoing_args_size
6885 of stack frame are unused. */
6886 if (ACCUMULATE_OUTGOING_ARGS
6887 && (!crtl->is_leaf || cfun->calls_alloca
6888 || ix86_current_function_calls_tls_descriptor))
6890 offset += crtl->outgoing_args_size;
6891 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6893 else
6894 frame->outgoing_arguments_size = 0;
6896 /* Align stack boundary. Only needed if we're calling another function
6897 or using alloca. */
6898 if (!crtl->is_leaf || cfun->calls_alloca
6899 || ix86_current_function_calls_tls_descriptor)
6900 offset = ROUND_UP (offset, preferred_alignment);
6902 /* We've reached end of stack frame. */
6903 frame->stack_pointer_offset = offset;
6905 /* Size prologue needs to allocate. */
6906 to_allocate = offset - frame->sse_reg_save_offset;
6908 if ((!to_allocate && frame->nregs <= 1)
6909 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6910 /* If static stack checking is enabled and done with probes,
6911 the registers need to be saved before allocating the frame. */
6912 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6913 /* If stack clash probing needs a loop, then it needs a
6914 scratch register. But the returned register is only guaranteed
6915 to be safe to use after register saves are complete. So if
6916 stack clash protections are enabled and the allocated frame is
6917 larger than the probe interval, then use pushes to save
6918 callee saved registers. */
6919 || (flag_stack_clash_protection
6920 && !ix86_target_stack_probe ()
6921 && to_allocate > get_probe_interval ()))
6922 frame->save_regs_using_mov = false;
6924 if (ix86_using_red_zone ()
6925 && crtl->sp_is_unchanging
6926 && crtl->is_leaf
6927 && !ix86_pc_thunk_call_expanded
6928 && !ix86_current_function_calls_tls_descriptor)
6930 frame->red_zone_size = to_allocate;
6931 if (frame->save_regs_using_mov)
6932 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6933 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6934 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6936 else
6937 frame->red_zone_size = 0;
6938 frame->stack_pointer_offset -= frame->red_zone_size;
6940 /* The SEH frame pointer location is near the bottom of the frame.
6941 This is enforced by the fact that the difference between the
6942 stack pointer and the frame pointer is limited to 240 bytes in
6943 the unwind data structure. */
6944 if (TARGET_SEH)
6946 /* Force the frame pointer to point at or below the lowest register save
6947 area, see the SEH code in config/i386/winnt.cc for the rationale. */
6948 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
6950 /* If we can leave the frame pointer where it is, do so; however return
6951 the establisher frame for __builtin_frame_address (0) or else if the
6952 frame overflows the SEH maximum frame size.
6954 Note that the value returned by __builtin_frame_address (0) is quite
6955 constrained, because setjmp is piggybacked on the SEH machinery with
6956 recent versions of MinGW:
6958 # elif defined(__SEH__)
6959 # if defined(__aarch64__) || defined(_ARM64_)
6960 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6961 # elif (__MINGW_GCC_VERSION < 40702)
6962 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6963 # else
6964 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6965 # endif
6967 and the second argument passed to _setjmp, if not null, is forwarded
6968 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6969 built an ExceptionRecord on the fly describing the setjmp buffer). */
6970 const HOST_WIDE_INT diff
6971 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6972 if (diff <= 255 && !crtl->accesses_prior_frames)
6974 /* The resulting diff will be a multiple of 16 lower than 255,
6975 i.e. at most 240 as required by the unwind data structure. */
6976 frame->hard_frame_pointer_offset += (diff & 15);
6978 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
6980 /* Ideally we'd determine what portion of the local stack frame
6981 (within the constraint of the lowest 240) is most heavily used.
6982 But without that complication, simply bias the frame pointer
6983 by 128 bytes so as to maximize the amount of the local stack
6984 frame that is addressable with 8-bit offsets. */
6985 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6987 else
6988 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
6992 /* This is semi-inlined memory_address_length, but simplified
6993 since we know that we're always dealing with reg+offset, and
6994 to avoid having to create and discard all that rtl. */
6996 static inline int
6997 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6999 int len = 4;
7001 if (offset == 0)
7003 /* EBP and R13 cannot be encoded without an offset. */
7004 len = (regno == BP_REG || regno == R13_REG);
7006 else if (IN_RANGE (offset, -128, 127))
7007 len = 1;
7009 /* ESP and R12 must be encoded with a SIB byte. */
7010 if (regno == SP_REG || regno == R12_REG)
7011 len++;
7013 return len;
7016 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
7017 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7019 static bool
7020 sp_valid_at (HOST_WIDE_INT cfa_offset)
7022 const struct machine_frame_state &fs = cfun->machine->fs;
7023 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
7025 /* Validate that the cfa_offset isn't in a "no-man's land". */
7026 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
7027 return false;
7029 return fs.sp_valid;
7032 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
7033 the frame save area. The register is saved at CFA - CFA_OFFSET. */
7035 static inline bool
7036 fp_valid_at (HOST_WIDE_INT cfa_offset)
7038 const struct machine_frame_state &fs = cfun->machine->fs;
7039 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7041 /* Validate that the cfa_offset isn't in a "no-man's land". */
7042 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7043 return false;
7045 return fs.fp_valid;
7048 /* Choose a base register based upon alignment requested, speed and/or
7049 size. */
7051 static void
7052 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7053 HOST_WIDE_INT &base_offset,
7054 unsigned int align_reqested, unsigned int *align)
7056 const struct machine_function *m = cfun->machine;
7057 unsigned int hfp_align;
7058 unsigned int drap_align;
7059 unsigned int sp_align;
7060 bool hfp_ok = fp_valid_at (cfa_offset);
7061 bool drap_ok = m->fs.drap_valid;
7062 bool sp_ok = sp_valid_at (cfa_offset);
7064 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7066 /* Filter out any registers that don't meet the requested alignment
7067 criteria. */
7068 if (align_reqested)
7070 if (m->fs.realigned)
7071 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7072 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7073 notes (which we would need to use a realigned stack pointer),
7074 so disable on SEH targets. */
7075 else if (m->fs.sp_realigned)
7076 sp_align = crtl->stack_alignment_needed;
7078 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7079 drap_ok = drap_ok && drap_align >= align_reqested;
7080 sp_ok = sp_ok && sp_align >= align_reqested;
7083 if (m->use_fast_prologue_epilogue)
7085 /* Choose the base register most likely to allow the most scheduling
7086 opportunities. Generally FP is valid throughout the function,
7087 while DRAP must be reloaded within the epilogue. But choose either
7088 over the SP due to increased encoding size. */
7090 if (hfp_ok)
7092 base_reg = hard_frame_pointer_rtx;
7093 base_offset = m->fs.fp_offset - cfa_offset;
7095 else if (drap_ok)
7097 base_reg = crtl->drap_reg;
7098 base_offset = 0 - cfa_offset;
7100 else if (sp_ok)
7102 base_reg = stack_pointer_rtx;
7103 base_offset = m->fs.sp_offset - cfa_offset;
7106 else
7108 HOST_WIDE_INT toffset;
7109 int len = 16, tlen;
7111 /* Choose the base register with the smallest address encoding.
7112 With a tie, choose FP > DRAP > SP. */
7113 if (sp_ok)
7115 base_reg = stack_pointer_rtx;
7116 base_offset = m->fs.sp_offset - cfa_offset;
7117 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7119 if (drap_ok)
7121 toffset = 0 - cfa_offset;
7122 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7123 if (tlen <= len)
7125 base_reg = crtl->drap_reg;
7126 base_offset = toffset;
7127 len = tlen;
7130 if (hfp_ok)
7132 toffset = m->fs.fp_offset - cfa_offset;
7133 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7134 if (tlen <= len)
7136 base_reg = hard_frame_pointer_rtx;
7137 base_offset = toffset;
7142 /* Set the align return value. */
7143 if (align)
7145 if (base_reg == stack_pointer_rtx)
7146 *align = sp_align;
7147 else if (base_reg == crtl->drap_reg)
7148 *align = drap_align;
7149 else if (base_reg == hard_frame_pointer_rtx)
7150 *align = hfp_align;
7154 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7155 the alignment of address. If ALIGN is non-null, it should point to
7156 an alignment value (in bits) that is preferred or zero and will
7157 recieve the alignment of the base register that was selected,
7158 irrespective of rather or not CFA_OFFSET is a multiple of that
7159 alignment value. If it is possible for the base register offset to be
7160 non-immediate then SCRATCH_REGNO should specify a scratch register to
7161 use.
7163 The valid base registers are taken from CFUN->MACHINE->FS. */
7165 static rtx
7166 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7167 unsigned int scratch_regno = INVALID_REGNUM)
7169 rtx base_reg = NULL;
7170 HOST_WIDE_INT base_offset = 0;
7172 /* If a specific alignment is requested, try to get a base register
7173 with that alignment first. */
7174 if (align && *align)
7175 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7177 if (!base_reg)
7178 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7180 gcc_assert (base_reg != NULL);
7182 rtx base_offset_rtx = GEN_INT (base_offset);
7184 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7186 gcc_assert (scratch_regno != INVALID_REGNUM);
7188 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7189 emit_move_insn (scratch_reg, base_offset_rtx);
7191 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7194 return plus_constant (Pmode, base_reg, base_offset);
7197 /* Emit code to save registers in the prologue. */
7199 static void
7200 ix86_emit_save_regs (void)
7202 unsigned int regno;
7203 rtx_insn *insn;
7205 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7206 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7208 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
7209 RTX_FRAME_RELATED_P (insn) = 1;
7213 /* Emit a single register save at CFA - CFA_OFFSET. */
7215 static void
7216 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7217 HOST_WIDE_INT cfa_offset)
7219 struct machine_function *m = cfun->machine;
7220 rtx reg = gen_rtx_REG (mode, regno);
7221 rtx mem, addr, base, insn;
7222 unsigned int align = GET_MODE_ALIGNMENT (mode);
7224 addr = choose_baseaddr (cfa_offset, &align);
7225 mem = gen_frame_mem (mode, addr);
7227 /* The location aligment depends upon the base register. */
7228 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7229 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7230 set_mem_align (mem, align);
7232 insn = emit_insn (gen_rtx_SET (mem, reg));
7233 RTX_FRAME_RELATED_P (insn) = 1;
7235 base = addr;
7236 if (GET_CODE (base) == PLUS)
7237 base = XEXP (base, 0);
7238 gcc_checking_assert (REG_P (base));
7240 /* When saving registers into a re-aligned local stack frame, avoid
7241 any tricky guessing by dwarf2out. */
7242 if (m->fs.realigned)
7244 gcc_checking_assert (stack_realign_drap);
7246 if (regno == REGNO (crtl->drap_reg))
7248 /* A bit of a hack. We force the DRAP register to be saved in
7249 the re-aligned stack frame, which provides us with a copy
7250 of the CFA that will last past the prologue. Install it. */
7251 gcc_checking_assert (cfun->machine->fs.fp_valid);
7252 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7253 cfun->machine->fs.fp_offset - cfa_offset);
7254 mem = gen_rtx_MEM (mode, addr);
7255 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7257 else
7259 /* The frame pointer is a stable reference within the
7260 aligned frame. Use it. */
7261 gcc_checking_assert (cfun->machine->fs.fp_valid);
7262 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7263 cfun->machine->fs.fp_offset - cfa_offset);
7264 mem = gen_rtx_MEM (mode, addr);
7265 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7269 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7270 && cfa_offset >= m->fs.sp_realigned_offset)
7272 gcc_checking_assert (stack_realign_fp);
7273 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7276 /* The memory may not be relative to the current CFA register,
7277 which means that we may need to generate a new pattern for
7278 use by the unwind info. */
7279 else if (base != m->fs.cfa_reg)
7281 addr = plus_constant (Pmode, m->fs.cfa_reg,
7282 m->fs.cfa_offset - cfa_offset);
7283 mem = gen_rtx_MEM (mode, addr);
7284 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7288 /* Emit code to save registers using MOV insns.
7289 First register is stored at CFA - CFA_OFFSET. */
7290 static void
7291 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7293 unsigned int regno;
7295 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7296 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7298 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7299 cfa_offset -= UNITS_PER_WORD;
7303 /* Emit code to save SSE registers using MOV insns.
7304 First register is stored at CFA - CFA_OFFSET. */
7305 static void
7306 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7308 unsigned int regno;
7310 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7311 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7313 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7314 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7318 static GTY(()) rtx queued_cfa_restores;
7320 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7321 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7322 Don't add the note if the previously saved value will be left untouched
7323 within stack red-zone till return, as unwinders can find the same value
7324 in the register and on the stack. */
7326 static void
7327 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7329 if (!crtl->shrink_wrapped
7330 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7331 return;
7333 if (insn)
7335 add_reg_note (insn, REG_CFA_RESTORE, reg);
7336 RTX_FRAME_RELATED_P (insn) = 1;
7338 else
7339 queued_cfa_restores
7340 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7343 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7345 static void
7346 ix86_add_queued_cfa_restore_notes (rtx insn)
7348 rtx last;
7349 if (!queued_cfa_restores)
7350 return;
7351 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7353 XEXP (last, 1) = REG_NOTES (insn);
7354 REG_NOTES (insn) = queued_cfa_restores;
7355 queued_cfa_restores = NULL_RTX;
7356 RTX_FRAME_RELATED_P (insn) = 1;
7359 /* Expand prologue or epilogue stack adjustment.
7360 The pattern exist to put a dependency on all ebp-based memory accesses.
7361 STYLE should be negative if instructions should be marked as frame related,
7362 zero if %r11 register is live and cannot be freely used and positive
7363 otherwise. */
7365 static rtx
7366 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7367 int style, bool set_cfa)
7369 struct machine_function *m = cfun->machine;
7370 rtx addend = offset;
7371 rtx insn;
7372 bool add_frame_related_expr = false;
7374 if (!x86_64_immediate_operand (offset, Pmode))
7376 /* r11 is used by indirect sibcall return as well, set before the
7377 epilogue and used after the epilogue. */
7378 if (style)
7379 addend = gen_rtx_REG (Pmode, R11_REG);
7380 else
7382 gcc_assert (src != hard_frame_pointer_rtx
7383 && dest != hard_frame_pointer_rtx);
7384 addend = hard_frame_pointer_rtx;
7386 emit_insn (gen_rtx_SET (addend, offset));
7387 if (style < 0)
7388 add_frame_related_expr = true;
7391 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7392 (Pmode, dest, src, addend));
7393 if (style >= 0)
7394 ix86_add_queued_cfa_restore_notes (insn);
7396 if (set_cfa)
7398 rtx r;
7400 gcc_assert (m->fs.cfa_reg == src);
7401 m->fs.cfa_offset += INTVAL (offset);
7402 m->fs.cfa_reg = dest;
7404 r = gen_rtx_PLUS (Pmode, src, offset);
7405 r = gen_rtx_SET (dest, r);
7406 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7407 RTX_FRAME_RELATED_P (insn) = 1;
7409 else if (style < 0)
7411 RTX_FRAME_RELATED_P (insn) = 1;
7412 if (add_frame_related_expr)
7414 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7415 r = gen_rtx_SET (dest, r);
7416 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7420 if (dest == stack_pointer_rtx)
7422 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7423 bool valid = m->fs.sp_valid;
7424 bool realigned = m->fs.sp_realigned;
7426 if (src == hard_frame_pointer_rtx)
7428 valid = m->fs.fp_valid;
7429 realigned = false;
7430 ooffset = m->fs.fp_offset;
7432 else if (src == crtl->drap_reg)
7434 valid = m->fs.drap_valid;
7435 realigned = false;
7436 ooffset = 0;
7438 else
7440 /* Else there are two possibilities: SP itself, which we set
7441 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7442 taken care of this by hand along the eh_return path. */
7443 gcc_checking_assert (src == stack_pointer_rtx
7444 || offset == const0_rtx);
7447 m->fs.sp_offset = ooffset - INTVAL (offset);
7448 m->fs.sp_valid = valid;
7449 m->fs.sp_realigned = realigned;
7451 return insn;
7454 /* Find an available register to be used as dynamic realign argument
7455 pointer regsiter. Such a register will be written in prologue and
7456 used in begin of body, so it must not be
7457 1. parameter passing register.
7458 2. GOT pointer.
7459 We reuse static-chain register if it is available. Otherwise, we
7460 use DI for i386 and R13 for x86-64. We chose R13 since it has
7461 shorter encoding.
7463 Return: the regno of chosen register. */
7465 static unsigned int
7466 find_drap_reg (void)
7468 tree decl = cfun->decl;
7470 /* Always use callee-saved register if there are no caller-saved
7471 registers. */
7472 if (TARGET_64BIT)
7474 /* Use R13 for nested function or function need static chain.
7475 Since function with tail call may use any caller-saved
7476 registers in epilogue, DRAP must not use caller-saved
7477 register in such case. */
7478 if (DECL_STATIC_CHAIN (decl)
7479 || cfun->machine->no_caller_saved_registers
7480 || crtl->tail_call_emit)
7481 return R13_REG;
7483 return R10_REG;
7485 else
7487 /* Use DI for nested function or function need static chain.
7488 Since function with tail call may use any caller-saved
7489 registers in epilogue, DRAP must not use caller-saved
7490 register in such case. */
7491 if (DECL_STATIC_CHAIN (decl)
7492 || cfun->machine->no_caller_saved_registers
7493 || crtl->tail_call_emit
7494 || crtl->calls_eh_return)
7495 return DI_REG;
7497 /* Reuse static chain register if it isn't used for parameter
7498 passing. */
7499 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7501 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7502 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7503 return CX_REG;
7505 return DI_REG;
7509 /* Return minimum incoming stack alignment. */
7511 static unsigned int
7512 ix86_minimum_incoming_stack_boundary (bool sibcall)
7514 unsigned int incoming_stack_boundary;
7516 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7517 if (cfun->machine->func_type != TYPE_NORMAL)
7518 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7519 /* Prefer the one specified at command line. */
7520 else if (ix86_user_incoming_stack_boundary)
7521 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7522 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7523 if -mstackrealign is used, it isn't used for sibcall check and
7524 estimated stack alignment is 128bit. */
7525 else if (!sibcall
7526 && ix86_force_align_arg_pointer
7527 && crtl->stack_alignment_estimated == 128)
7528 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7529 else
7530 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7532 /* Incoming stack alignment can be changed on individual functions
7533 via force_align_arg_pointer attribute. We use the smallest
7534 incoming stack boundary. */
7535 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7536 && lookup_attribute ("force_align_arg_pointer",
7537 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7538 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7540 /* The incoming stack frame has to be aligned at least at
7541 parm_stack_boundary. */
7542 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7543 incoming_stack_boundary = crtl->parm_stack_boundary;
7545 /* Stack at entrance of main is aligned by runtime. We use the
7546 smallest incoming stack boundary. */
7547 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7548 && DECL_NAME (current_function_decl)
7549 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7550 && DECL_FILE_SCOPE_P (current_function_decl))
7551 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7553 return incoming_stack_boundary;
7556 /* Update incoming stack boundary and estimated stack alignment. */
7558 static void
7559 ix86_update_stack_boundary (void)
7561 ix86_incoming_stack_boundary
7562 = ix86_minimum_incoming_stack_boundary (false);
7564 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7565 if (TARGET_64BIT
7566 && cfun->stdarg
7567 && crtl->stack_alignment_estimated < 128)
7568 crtl->stack_alignment_estimated = 128;
7570 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7571 if (ix86_tls_descriptor_calls_expanded_in_cfun
7572 && crtl->preferred_stack_boundary < 128)
7573 crtl->preferred_stack_boundary = 128;
7576 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7577 needed or an rtx for DRAP otherwise. */
7579 static rtx
7580 ix86_get_drap_rtx (void)
7582 /* We must use DRAP if there are outgoing arguments on stack or
7583 the stack pointer register is clobbered by asm statment and
7584 ACCUMULATE_OUTGOING_ARGS is false. */
7585 if (ix86_force_drap
7586 || ((cfun->machine->outgoing_args_on_stack
7587 || crtl->sp_is_clobbered_by_asm)
7588 && !ACCUMULATE_OUTGOING_ARGS))
7589 crtl->need_drap = true;
7591 if (stack_realign_drap)
7593 /* Assign DRAP to vDRAP and returns vDRAP */
7594 unsigned int regno = find_drap_reg ();
7595 rtx drap_vreg;
7596 rtx arg_ptr;
7597 rtx_insn *seq, *insn;
7599 arg_ptr = gen_rtx_REG (Pmode, regno);
7600 crtl->drap_reg = arg_ptr;
7602 start_sequence ();
7603 drap_vreg = copy_to_reg (arg_ptr);
7604 seq = get_insns ();
7605 end_sequence ();
7607 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7608 if (!optimize)
7610 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7611 RTX_FRAME_RELATED_P (insn) = 1;
7613 return drap_vreg;
7615 else
7616 return NULL;
7619 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7621 static rtx
7622 ix86_internal_arg_pointer (void)
7624 return virtual_incoming_args_rtx;
7627 struct scratch_reg {
7628 rtx reg;
7629 bool saved;
7632 /* Return a short-lived scratch register for use on function entry.
7633 In 32-bit mode, it is valid only after the registers are saved
7634 in the prologue. This register must be released by means of
7635 release_scratch_register_on_entry once it is dead. */
7637 static void
7638 get_scratch_register_on_entry (struct scratch_reg *sr)
7640 int regno;
7642 sr->saved = false;
7644 if (TARGET_64BIT)
7646 /* We always use R11 in 64-bit mode. */
7647 regno = R11_REG;
7649 else
7651 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7652 bool fastcall_p
7653 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7654 bool thiscall_p
7655 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7656 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7657 int regparm = ix86_function_regparm (fntype, decl);
7658 int drap_regno
7659 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7661 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7662 for the static chain register. */
7663 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7664 && drap_regno != AX_REG)
7665 regno = AX_REG;
7666 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7667 for the static chain register. */
7668 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7669 regno = AX_REG;
7670 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7671 regno = DX_REG;
7672 /* ecx is the static chain register. */
7673 else if (regparm < 3 && !fastcall_p && !thiscall_p
7674 && !static_chain_p
7675 && drap_regno != CX_REG)
7676 regno = CX_REG;
7677 else if (ix86_save_reg (BX_REG, true, false))
7678 regno = BX_REG;
7679 /* esi is the static chain register. */
7680 else if (!(regparm == 3 && static_chain_p)
7681 && ix86_save_reg (SI_REG, true, false))
7682 regno = SI_REG;
7683 else if (ix86_save_reg (DI_REG, true, false))
7684 regno = DI_REG;
7685 else
7687 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7688 sr->saved = true;
7692 sr->reg = gen_rtx_REG (Pmode, regno);
7693 if (sr->saved)
7695 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7696 RTX_FRAME_RELATED_P (insn) = 1;
7700 /* Release a scratch register obtained from the preceding function.
7702 If RELEASE_VIA_POP is true, we just pop the register off the stack
7703 to release it. This is what non-Linux systems use with -fstack-check.
7705 Otherwise we use OFFSET to locate the saved register and the
7706 allocated stack space becomes part of the local frame and is
7707 deallocated by the epilogue. */
7709 static void
7710 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7711 bool release_via_pop)
7713 if (sr->saved)
7715 if (release_via_pop)
7717 struct machine_function *m = cfun->machine;
7718 rtx x, insn = emit_insn (gen_pop (sr->reg));
7720 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7721 RTX_FRAME_RELATED_P (insn) = 1;
7722 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7723 x = gen_rtx_SET (stack_pointer_rtx, x);
7724 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7725 m->fs.sp_offset -= UNITS_PER_WORD;
7727 else
7729 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7730 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7731 emit_insn (x);
7736 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7738 If INT_REGISTERS_SAVED is true, then integer registers have already been
7739 pushed on the stack.
7741 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7742 beyond SIZE bytes.
7744 This assumes no knowledge of the current probing state, i.e. it is never
7745 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7746 a suitable probe. */
7748 static void
7749 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7750 const bool int_registers_saved,
7751 const bool protection_area)
7753 struct machine_function *m = cfun->machine;
7755 /* If this function does not statically allocate stack space, then
7756 no probes are needed. */
7757 if (!size)
7759 /* However, the allocation of space via pushes for register
7760 saves could be viewed as allocating space, but without the
7761 need to probe. */
7762 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7763 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7764 else
7765 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7766 return;
7769 /* If we are a noreturn function, then we have to consider the
7770 possibility that we're called via a jump rather than a call.
7772 Thus we don't have the implicit probe generated by saving the
7773 return address into the stack at the call. Thus, the stack
7774 pointer could be anywhere in the guard page. The safe thing
7775 to do is emit a probe now.
7777 The probe can be avoided if we have already emitted any callee
7778 register saves into the stack or have a frame pointer (which will
7779 have been saved as well). Those saves will function as implicit
7780 probes.
7782 ?!? This should be revamped to work like aarch64 and s390 where
7783 we track the offset from the most recent probe. Normally that
7784 offset would be zero. For a noreturn function we would reset
7785 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7786 we just probe when we cross PROBE_INTERVAL. */
7787 if (TREE_THIS_VOLATILE (cfun->decl)
7788 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7790 /* We can safely use any register here since we're just going to push
7791 its value and immediately pop it back. But we do try and avoid
7792 argument passing registers so as not to introduce dependencies in
7793 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7794 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7795 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7796 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7797 m->fs.sp_offset -= UNITS_PER_WORD;
7798 if (m->fs.cfa_reg == stack_pointer_rtx)
7800 m->fs.cfa_offset -= UNITS_PER_WORD;
7801 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7802 x = gen_rtx_SET (stack_pointer_rtx, x);
7803 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7804 RTX_FRAME_RELATED_P (insn_push) = 1;
7805 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7806 x = gen_rtx_SET (stack_pointer_rtx, x);
7807 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7808 RTX_FRAME_RELATED_P (insn_pop) = 1;
7810 emit_insn (gen_blockage ());
7813 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7814 const int dope = 4 * UNITS_PER_WORD;
7816 /* If there is protection area, take it into account in the size. */
7817 if (protection_area)
7818 size += probe_interval + dope;
7820 /* If we allocate less than the size of the guard statically,
7821 then no probing is necessary, but we do need to allocate
7822 the stack. */
7823 else if (size < (1 << param_stack_clash_protection_guard_size))
7825 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7826 GEN_INT (-size), -1,
7827 m->fs.cfa_reg == stack_pointer_rtx);
7828 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7829 return;
7832 /* We're allocating a large enough stack frame that we need to
7833 emit probes. Either emit them inline or in a loop depending
7834 on the size. */
7835 if (size <= 4 * probe_interval)
7837 HOST_WIDE_INT i;
7838 for (i = probe_interval; i <= size; i += probe_interval)
7840 /* Allocate PROBE_INTERVAL bytes. */
7841 rtx insn
7842 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7843 GEN_INT (-probe_interval), -1,
7844 m->fs.cfa_reg == stack_pointer_rtx);
7845 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7847 /* And probe at *sp. */
7848 emit_stack_probe (stack_pointer_rtx);
7849 emit_insn (gen_blockage ());
7852 /* We need to allocate space for the residual, but we do not need
7853 to probe the residual... */
7854 HOST_WIDE_INT residual = (i - probe_interval - size);
7855 if (residual)
7857 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7858 GEN_INT (residual), -1,
7859 m->fs.cfa_reg == stack_pointer_rtx);
7861 /* ...except if there is a protection area to maintain. */
7862 if (protection_area)
7863 emit_stack_probe (stack_pointer_rtx);
7866 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7868 else
7870 /* We expect the GP registers to be saved when probes are used
7871 as the probing sequences might need a scratch register and
7872 the routine to allocate one assumes the integer registers
7873 have already been saved. */
7874 gcc_assert (int_registers_saved);
7876 struct scratch_reg sr;
7877 get_scratch_register_on_entry (&sr);
7879 /* If we needed to save a register, then account for any space
7880 that was pushed (we are not going to pop the register when
7881 we do the restore). */
7882 if (sr.saved)
7883 size -= UNITS_PER_WORD;
7885 /* Step 1: round SIZE down to a multiple of the interval. */
7886 HOST_WIDE_INT rounded_size = size & -probe_interval;
7888 /* Step 2: compute final value of the loop counter. Use lea if
7889 possible. */
7890 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7891 rtx insn;
7892 if (address_no_seg_operand (addr, Pmode))
7893 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7894 else
7896 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7897 insn = emit_insn (gen_rtx_SET (sr.reg,
7898 gen_rtx_PLUS (Pmode, sr.reg,
7899 stack_pointer_rtx)));
7901 if (m->fs.cfa_reg == stack_pointer_rtx)
7903 add_reg_note (insn, REG_CFA_DEF_CFA,
7904 plus_constant (Pmode, sr.reg,
7905 m->fs.cfa_offset + rounded_size));
7906 RTX_FRAME_RELATED_P (insn) = 1;
7909 /* Step 3: the loop. */
7910 rtx size_rtx = GEN_INT (rounded_size);
7911 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7912 size_rtx));
7913 if (m->fs.cfa_reg == stack_pointer_rtx)
7915 m->fs.cfa_offset += rounded_size;
7916 add_reg_note (insn, REG_CFA_DEF_CFA,
7917 plus_constant (Pmode, stack_pointer_rtx,
7918 m->fs.cfa_offset));
7919 RTX_FRAME_RELATED_P (insn) = 1;
7921 m->fs.sp_offset += rounded_size;
7922 emit_insn (gen_blockage ());
7924 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7925 is equal to ROUNDED_SIZE. */
7927 if (size != rounded_size)
7929 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7930 GEN_INT (rounded_size - size), -1,
7931 m->fs.cfa_reg == stack_pointer_rtx);
7933 if (protection_area)
7934 emit_stack_probe (stack_pointer_rtx);
7937 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7939 /* This does not deallocate the space reserved for the scratch
7940 register. That will be deallocated in the epilogue. */
7941 release_scratch_register_on_entry (&sr, size, false);
7944 /* Adjust back to account for the protection area. */
7945 if (protection_area)
7946 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7947 GEN_INT (probe_interval + dope), -1,
7948 m->fs.cfa_reg == stack_pointer_rtx);
7950 /* Make sure nothing is scheduled before we are done. */
7951 emit_insn (gen_blockage ());
7954 /* Adjust the stack pointer up to REG while probing it. */
7956 const char *
7957 output_adjust_stack_and_probe (rtx reg)
7959 static int labelno = 0;
7960 char loop_lab[32];
7961 rtx xops[2];
7963 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7965 /* Loop. */
7966 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7968 /* SP = SP + PROBE_INTERVAL. */
7969 xops[0] = stack_pointer_rtx;
7970 xops[1] = GEN_INT (get_probe_interval ());
7971 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7973 /* Probe at SP. */
7974 xops[1] = const0_rtx;
7975 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7977 /* Test if SP == LAST_ADDR. */
7978 xops[0] = stack_pointer_rtx;
7979 xops[1] = reg;
7980 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7982 /* Branch. */
7983 fputs ("\tjne\t", asm_out_file);
7984 assemble_name_raw (asm_out_file, loop_lab);
7985 fputc ('\n', asm_out_file);
7987 return "";
7990 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7991 inclusive. These are offsets from the current stack pointer.
7993 INT_REGISTERS_SAVED is true if integer registers have already been
7994 pushed on the stack. */
7996 static void
7997 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7998 const bool int_registers_saved)
8000 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8002 /* See if we have a constant small number of probes to generate. If so,
8003 that's the easy case. The run-time loop is made up of 6 insns in the
8004 generic case while the compile-time loop is made up of n insns for n #
8005 of intervals. */
8006 if (size <= 6 * probe_interval)
8008 HOST_WIDE_INT i;
8010 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
8011 it exceeds SIZE. If only one probe is needed, this will not
8012 generate any code. Then probe at FIRST + SIZE. */
8013 for (i = probe_interval; i < size; i += probe_interval)
8014 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8015 -(first + i)));
8017 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8018 -(first + size)));
8021 /* Otherwise, do the same as above, but in a loop. Note that we must be
8022 extra careful with variables wrapping around because we might be at
8023 the very top (or the very bottom) of the address space and we have
8024 to be able to handle this case properly; in particular, we use an
8025 equality test for the loop condition. */
8026 else
8028 /* We expect the GP registers to be saved when probes are used
8029 as the probing sequences might need a scratch register and
8030 the routine to allocate one assumes the integer registers
8031 have already been saved. */
8032 gcc_assert (int_registers_saved);
8034 HOST_WIDE_INT rounded_size, last;
8035 struct scratch_reg sr;
8037 get_scratch_register_on_entry (&sr);
8040 /* Step 1: round SIZE to the previous multiple of the interval. */
8042 rounded_size = ROUND_DOWN (size, probe_interval);
8045 /* Step 2: compute initial and final value of the loop counter. */
8047 /* TEST_OFFSET = FIRST. */
8048 emit_move_insn (sr.reg, GEN_INT (-first));
8050 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8051 last = first + rounded_size;
8054 /* Step 3: the loop
8058 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8059 probe at TEST_ADDR
8061 while (TEST_ADDR != LAST_ADDR)
8063 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8064 until it is equal to ROUNDED_SIZE. */
8066 emit_insn
8067 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8070 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8071 that SIZE is equal to ROUNDED_SIZE. */
8073 if (size != rounded_size)
8074 emit_stack_probe (plus_constant (Pmode,
8075 gen_rtx_PLUS (Pmode,
8076 stack_pointer_rtx,
8077 sr.reg),
8078 rounded_size - size));
8080 release_scratch_register_on_entry (&sr, size, true);
8083 /* Make sure nothing is scheduled before we are done. */
8084 emit_insn (gen_blockage ());
8087 /* Probe a range of stack addresses from REG to END, inclusive. These are
8088 offsets from the current stack pointer. */
8090 const char *
8091 output_probe_stack_range (rtx reg, rtx end)
8093 static int labelno = 0;
8094 char loop_lab[32];
8095 rtx xops[3];
8097 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8099 /* Loop. */
8100 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8102 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8103 xops[0] = reg;
8104 xops[1] = GEN_INT (get_probe_interval ());
8105 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8107 /* Probe at TEST_ADDR. */
8108 xops[0] = stack_pointer_rtx;
8109 xops[1] = reg;
8110 xops[2] = const0_rtx;
8111 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8113 /* Test if TEST_ADDR == LAST_ADDR. */
8114 xops[0] = reg;
8115 xops[1] = end;
8116 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8118 /* Branch. */
8119 fputs ("\tjne\t", asm_out_file);
8120 assemble_name_raw (asm_out_file, loop_lab);
8121 fputc ('\n', asm_out_file);
8123 return "";
8126 /* Set stack_frame_required to false if stack frame isn't required.
8127 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8128 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8130 static void
8131 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8132 bool check_stack_slot)
8134 HARD_REG_SET set_up_by_prologue, prologue_used;
8135 basic_block bb;
8137 CLEAR_HARD_REG_SET (prologue_used);
8138 CLEAR_HARD_REG_SET (set_up_by_prologue);
8139 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8140 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8141 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8142 HARD_FRAME_POINTER_REGNUM);
8144 /* The preferred stack alignment is the minimum stack alignment. */
8145 if (stack_alignment > crtl->preferred_stack_boundary)
8146 stack_alignment = crtl->preferred_stack_boundary;
8148 bool require_stack_frame = false;
8150 FOR_EACH_BB_FN (bb, cfun)
8152 rtx_insn *insn;
8153 FOR_BB_INSNS (bb, insn)
8154 if (NONDEBUG_INSN_P (insn)
8155 && requires_stack_frame_p (insn, prologue_used,
8156 set_up_by_prologue))
8158 require_stack_frame = true;
8160 if (check_stack_slot)
8162 /* Find the maximum stack alignment. */
8163 subrtx_iterator::array_type array;
8164 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8165 if (MEM_P (*iter)
8166 && (reg_mentioned_p (stack_pointer_rtx,
8167 *iter)
8168 || reg_mentioned_p (frame_pointer_rtx,
8169 *iter)))
8171 unsigned int alignment = MEM_ALIGN (*iter);
8172 if (alignment > stack_alignment)
8173 stack_alignment = alignment;
8179 cfun->machine->stack_frame_required = require_stack_frame;
8182 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8183 will guide prologue/epilogue to be generated in correct form. */
8185 static void
8186 ix86_finalize_stack_frame_flags (void)
8188 /* Check if stack realign is really needed after reload, and
8189 stores result in cfun */
8190 unsigned int incoming_stack_boundary
8191 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8192 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8193 unsigned int stack_alignment
8194 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8195 ? crtl->max_used_stack_slot_alignment
8196 : crtl->stack_alignment_needed);
8197 unsigned int stack_realign
8198 = (incoming_stack_boundary < stack_alignment);
8199 bool recompute_frame_layout_p = false;
8201 if (crtl->stack_realign_finalized)
8203 /* After stack_realign_needed is finalized, we can't no longer
8204 change it. */
8205 gcc_assert (crtl->stack_realign_needed == stack_realign);
8206 return;
8209 /* It is always safe to compute max_used_stack_alignment. We
8210 compute it only if 128-bit aligned load/store may be generated
8211 on misaligned stack slot which will lead to segfault. */
8212 bool check_stack_slot
8213 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8214 ix86_find_max_used_stack_alignment (stack_alignment,
8215 check_stack_slot);
8217 /* If the only reason for frame_pointer_needed is that we conservatively
8218 assumed stack realignment might be needed or -fno-omit-frame-pointer
8219 is used, but in the end nothing that needed the stack alignment had
8220 been spilled nor stack access, clear frame_pointer_needed and say we
8221 don't need stack realignment.
8223 When vector register is used for piecewise move and store, we don't
8224 increase stack_alignment_needed as there is no register spill for
8225 piecewise move and store. Since stack_realign_needed is set to true
8226 by checking stack_alignment_estimated which is updated by pseudo
8227 vector register usage, we also need to check stack_realign_needed to
8228 eliminate frame pointer. */
8229 if ((stack_realign
8230 || (!flag_omit_frame_pointer && optimize)
8231 || crtl->stack_realign_needed)
8232 && frame_pointer_needed
8233 && crtl->is_leaf
8234 && crtl->sp_is_unchanging
8235 && !ix86_current_function_calls_tls_descriptor
8236 && !crtl->accesses_prior_frames
8237 && !cfun->calls_alloca
8238 && !crtl->calls_eh_return
8239 /* See ira_setup_eliminable_regset for the rationale. */
8240 && !(STACK_CHECK_MOVING_SP
8241 && flag_stack_check
8242 && flag_exceptions
8243 && cfun->can_throw_non_call_exceptions)
8244 && !ix86_frame_pointer_required ()
8245 && ix86_get_frame_size () == 0
8246 && ix86_nsaved_sseregs () == 0
8247 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8249 if (cfun->machine->stack_frame_required)
8251 /* Stack frame is required. If stack alignment needed is less
8252 than incoming stack boundary, don't realign stack. */
8253 stack_realign = incoming_stack_boundary < stack_alignment;
8254 if (!stack_realign)
8256 crtl->max_used_stack_slot_alignment
8257 = incoming_stack_boundary;
8258 crtl->stack_alignment_needed
8259 = incoming_stack_boundary;
8260 /* Also update preferred_stack_boundary for leaf
8261 functions. */
8262 crtl->preferred_stack_boundary
8263 = incoming_stack_boundary;
8266 else
8268 /* If drap has been set, but it actually isn't live at the
8269 start of the function, there is no reason to set it up. */
8270 if (crtl->drap_reg)
8272 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8273 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8274 REGNO (crtl->drap_reg)))
8276 crtl->drap_reg = NULL_RTX;
8277 crtl->need_drap = false;
8280 else
8281 cfun->machine->no_drap_save_restore = true;
8283 frame_pointer_needed = false;
8284 stack_realign = false;
8285 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8286 crtl->stack_alignment_needed = incoming_stack_boundary;
8287 crtl->stack_alignment_estimated = incoming_stack_boundary;
8288 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8289 crtl->preferred_stack_boundary = incoming_stack_boundary;
8290 df_finish_pass (true);
8291 df_scan_alloc (NULL);
8292 df_scan_blocks ();
8293 df_compute_regs_ever_live (true);
8294 df_analyze ();
8296 if (flag_var_tracking)
8298 /* Since frame pointer is no longer available, replace it with
8299 stack pointer - UNITS_PER_WORD in debug insns. */
8300 df_ref ref, next;
8301 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8302 ref; ref = next)
8304 next = DF_REF_NEXT_REG (ref);
8305 if (!DF_REF_INSN_INFO (ref))
8306 continue;
8308 /* Make sure the next ref is for a different instruction,
8309 so that we're not affected by the rescan. */
8310 rtx_insn *insn = DF_REF_INSN (ref);
8311 while (next && DF_REF_INSN (next) == insn)
8312 next = DF_REF_NEXT_REG (next);
8314 if (DEBUG_INSN_P (insn))
8316 bool changed = false;
8317 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8319 rtx *loc = DF_REF_LOC (ref);
8320 if (*loc == hard_frame_pointer_rtx)
8322 *loc = plus_constant (Pmode,
8323 stack_pointer_rtx,
8324 -UNITS_PER_WORD);
8325 changed = true;
8328 if (changed)
8329 df_insn_rescan (insn);
8334 recompute_frame_layout_p = true;
8337 else if (crtl->max_used_stack_slot_alignment >= 128
8338 && cfun->machine->stack_frame_required)
8340 /* We don't need to realign stack. max_used_stack_alignment is
8341 used to decide how stack frame should be aligned. This is
8342 independent of any psABIs nor 32-bit vs 64-bit. */
8343 cfun->machine->max_used_stack_alignment
8344 = stack_alignment / BITS_PER_UNIT;
8347 if (crtl->stack_realign_needed != stack_realign)
8348 recompute_frame_layout_p = true;
8349 crtl->stack_realign_needed = stack_realign;
8350 crtl->stack_realign_finalized = true;
8351 if (recompute_frame_layout_p)
8352 ix86_compute_frame_layout ();
8355 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8357 static void
8358 ix86_elim_entry_set_got (rtx reg)
8360 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8361 rtx_insn *c_insn = BB_HEAD (bb);
8362 if (!NONDEBUG_INSN_P (c_insn))
8363 c_insn = next_nonnote_nondebug_insn (c_insn);
8364 if (c_insn && NONJUMP_INSN_P (c_insn))
8366 rtx pat = PATTERN (c_insn);
8367 if (GET_CODE (pat) == PARALLEL)
8369 rtx vec = XVECEXP (pat, 0, 0);
8370 if (GET_CODE (vec) == SET
8371 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
8372 && REGNO (XEXP (vec, 0)) == REGNO (reg))
8373 delete_insn (c_insn);
8378 static rtx
8379 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8381 rtx addr, mem;
8383 if (offset)
8384 addr = plus_constant (Pmode, frame_reg, offset);
8385 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8386 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8389 static inline rtx
8390 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8392 return gen_frame_set (reg, frame_reg, offset, false);
8395 static inline rtx
8396 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8398 return gen_frame_set (reg, frame_reg, offset, true);
8401 static void
8402 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8404 struct machine_function *m = cfun->machine;
8405 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8406 + m->call_ms2sysv_extra_regs;
8407 rtvec v = rtvec_alloc (ncregs + 1);
8408 unsigned int align, i, vi = 0;
8409 rtx_insn *insn;
8410 rtx sym, addr;
8411 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8412 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8414 /* AL should only be live with sysv_abi. */
8415 gcc_assert (!ix86_eax_live_at_start_p ());
8416 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8418 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8419 we've actually realigned the stack or not. */
8420 align = GET_MODE_ALIGNMENT (V4SFmode);
8421 addr = choose_baseaddr (frame.stack_realign_offset
8422 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8423 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8425 emit_insn (gen_rtx_SET (rax, addr));
8427 /* Get the stub symbol. */
8428 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8429 : XLOGUE_STUB_SAVE);
8430 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8432 for (i = 0; i < ncregs; ++i)
8434 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8435 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8436 r.regno);
8437 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8440 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8442 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8443 RTX_FRAME_RELATED_P (insn) = true;
8446 /* Generate and return an insn body to AND X with Y. */
8448 static rtx_insn *
8449 gen_and2_insn (rtx x, rtx y)
8451 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8453 gcc_assert (insn_operand_matches (icode, 0, x));
8454 gcc_assert (insn_operand_matches (icode, 1, x));
8455 gcc_assert (insn_operand_matches (icode, 2, y));
8457 return GEN_FCN (icode) (x, x, y);
8460 /* Expand the prologue into a bunch of separate insns. */
8462 void
8463 ix86_expand_prologue (void)
8465 struct machine_function *m = cfun->machine;
8466 rtx insn, t;
8467 HOST_WIDE_INT allocate;
8468 bool int_registers_saved;
8469 bool sse_registers_saved;
8470 bool save_stub_call_needed;
8471 rtx static_chain = NULL_RTX;
8473 ix86_last_zero_store_uid = 0;
8474 if (ix86_function_naked (current_function_decl))
8476 if (flag_stack_usage_info)
8477 current_function_static_stack_size = 0;
8478 return;
8481 ix86_finalize_stack_frame_flags ();
8483 /* DRAP should not coexist with stack_realign_fp */
8484 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8486 memset (&m->fs, 0, sizeof (m->fs));
8488 /* Initialize CFA state for before the prologue. */
8489 m->fs.cfa_reg = stack_pointer_rtx;
8490 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8492 /* Track SP offset to the CFA. We continue tracking this after we've
8493 swapped the CFA register away from SP. In the case of re-alignment
8494 this is fudged; we're interested to offsets within the local frame. */
8495 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8496 m->fs.sp_valid = true;
8497 m->fs.sp_realigned = false;
8499 const struct ix86_frame &frame = cfun->machine->frame;
8501 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8503 /* We should have already generated an error for any use of
8504 ms_hook on a nested function. */
8505 gcc_checking_assert (!ix86_static_chain_on_stack);
8507 /* Check if profiling is active and we shall use profiling before
8508 prologue variant. If so sorry. */
8509 if (crtl->profile && flag_fentry != 0)
8510 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8511 "with %<-mfentry%> for 32-bit");
8513 /* In ix86_asm_output_function_label we emitted:
8514 8b ff movl.s %edi,%edi
8515 55 push %ebp
8516 8b ec movl.s %esp,%ebp
8518 This matches the hookable function prologue in Win32 API
8519 functions in Microsoft Windows XP Service Pack 2 and newer.
8520 Wine uses this to enable Windows apps to hook the Win32 API
8521 functions provided by Wine.
8523 What that means is that we've already set up the frame pointer. */
8525 if (frame_pointer_needed
8526 && !(crtl->drap_reg && crtl->stack_realign_needed))
8528 rtx push, mov;
8530 /* We've decided to use the frame pointer already set up.
8531 Describe this to the unwinder by pretending that both
8532 push and mov insns happen right here.
8534 Putting the unwind info here at the end of the ms_hook
8535 is done so that we can make absolutely certain we get
8536 the required byte sequence at the start of the function,
8537 rather than relying on an assembler that can produce
8538 the exact encoding required.
8540 However it does mean (in the unpatched case) that we have
8541 a 1 insn window where the asynchronous unwind info is
8542 incorrect. However, if we placed the unwind info at
8543 its correct location we would have incorrect unwind info
8544 in the patched case. Which is probably all moot since
8545 I don't expect Wine generates dwarf2 unwind info for the
8546 system libraries that use this feature. */
8548 insn = emit_insn (gen_blockage ());
8550 push = gen_push (hard_frame_pointer_rtx);
8551 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8552 stack_pointer_rtx);
8553 RTX_FRAME_RELATED_P (push) = 1;
8554 RTX_FRAME_RELATED_P (mov) = 1;
8556 RTX_FRAME_RELATED_P (insn) = 1;
8557 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8558 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8560 /* Note that gen_push incremented m->fs.cfa_offset, even
8561 though we didn't emit the push insn here. */
8562 m->fs.cfa_reg = hard_frame_pointer_rtx;
8563 m->fs.fp_offset = m->fs.cfa_offset;
8564 m->fs.fp_valid = true;
8566 else
8568 /* The frame pointer is not needed so pop %ebp again.
8569 This leaves us with a pristine state. */
8570 emit_insn (gen_pop (hard_frame_pointer_rtx));
8574 /* The first insn of a function that accepts its static chain on the
8575 stack is to push the register that would be filled in by a direct
8576 call. This insn will be skipped by the trampoline. */
8577 else if (ix86_static_chain_on_stack)
8579 static_chain = ix86_static_chain (cfun->decl, false);
8580 insn = emit_insn (gen_push (static_chain));
8581 emit_insn (gen_blockage ());
8583 /* We don't want to interpret this push insn as a register save,
8584 only as a stack adjustment. The real copy of the register as
8585 a save will be done later, if needed. */
8586 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8587 t = gen_rtx_SET (stack_pointer_rtx, t);
8588 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8589 RTX_FRAME_RELATED_P (insn) = 1;
8592 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8593 of DRAP is needed and stack realignment is really needed after reload */
8594 if (stack_realign_drap)
8596 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8598 /* Can't use DRAP in interrupt function. */
8599 if (cfun->machine->func_type != TYPE_NORMAL)
8600 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8601 "in interrupt service routine. This may be worked "
8602 "around by avoiding functions with aggregate return.");
8604 /* Only need to push parameter pointer reg if it is caller saved. */
8605 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8607 /* Push arg pointer reg */
8608 insn = emit_insn (gen_push (crtl->drap_reg));
8609 RTX_FRAME_RELATED_P (insn) = 1;
8612 /* Grab the argument pointer. */
8613 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8614 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8615 RTX_FRAME_RELATED_P (insn) = 1;
8616 m->fs.cfa_reg = crtl->drap_reg;
8617 m->fs.cfa_offset = 0;
8619 /* Align the stack. */
8620 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8621 GEN_INT (-align_bytes)));
8622 RTX_FRAME_RELATED_P (insn) = 1;
8624 /* Replicate the return address on the stack so that return
8625 address can be reached via (argp - 1) slot. This is needed
8626 to implement macro RETURN_ADDR_RTX and intrinsic function
8627 expand_builtin_return_addr etc. */
8628 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8629 t = gen_frame_mem (word_mode, t);
8630 insn = emit_insn (gen_push (t));
8631 RTX_FRAME_RELATED_P (insn) = 1;
8633 /* For the purposes of frame and register save area addressing,
8634 we've started over with a new frame. */
8635 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8636 m->fs.realigned = true;
8638 if (static_chain)
8640 /* Replicate static chain on the stack so that static chain
8641 can be reached via (argp - 2) slot. This is needed for
8642 nested function with stack realignment. */
8643 insn = emit_insn (gen_push (static_chain));
8644 RTX_FRAME_RELATED_P (insn) = 1;
8648 int_registers_saved = (frame.nregs == 0);
8649 sse_registers_saved = (frame.nsseregs == 0);
8650 save_stub_call_needed = (m->call_ms2sysv);
8651 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8653 if (frame_pointer_needed && !m->fs.fp_valid)
8655 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8656 slower on all targets. Also sdb didn't like it. */
8657 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8658 RTX_FRAME_RELATED_P (insn) = 1;
8660 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8662 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8663 RTX_FRAME_RELATED_P (insn) = 1;
8665 if (m->fs.cfa_reg == stack_pointer_rtx)
8666 m->fs.cfa_reg = hard_frame_pointer_rtx;
8667 m->fs.fp_offset = m->fs.sp_offset;
8668 m->fs.fp_valid = true;
8672 if (!int_registers_saved)
8674 /* If saving registers via PUSH, do so now. */
8675 if (!frame.save_regs_using_mov)
8677 ix86_emit_save_regs ();
8678 int_registers_saved = true;
8679 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8682 /* When using red zone we may start register saving before allocating
8683 the stack frame saving one cycle of the prologue. However, avoid
8684 doing this if we have to probe the stack; at least on x86_64 the
8685 stack probe can turn into a call that clobbers a red zone location. */
8686 else if (ix86_using_red_zone ()
8687 && (! TARGET_STACK_PROBE
8688 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8690 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8691 cfun->machine->red_zone_used = true;
8692 int_registers_saved = true;
8696 if (frame.red_zone_size != 0)
8697 cfun->machine->red_zone_used = true;
8699 if (stack_realign_fp)
8701 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8702 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8704 /* Record last valid frame pointer offset. */
8705 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8707 /* The computation of the size of the re-aligned stack frame means
8708 that we must allocate the size of the register save area before
8709 performing the actual alignment. Otherwise we cannot guarantee
8710 that there's enough storage above the realignment point. */
8711 allocate = frame.reg_save_offset - m->fs.sp_offset
8712 + frame.stack_realign_allocate;
8713 if (allocate)
8714 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8715 GEN_INT (-allocate), -1, false);
8717 /* Align the stack. */
8718 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8719 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8720 m->fs.sp_realigned_offset = m->fs.sp_offset
8721 - frame.stack_realign_allocate;
8722 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8723 Beyond this point, stack access should be done via choose_baseaddr or
8724 by using sp_valid_at and fp_valid_at to determine the correct base
8725 register. Henceforth, any CFA offset should be thought of as logical
8726 and not physical. */
8727 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8728 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8729 m->fs.sp_realigned = true;
8731 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8732 is needed to describe where a register is saved using a realigned
8733 stack pointer, so we need to invalidate the stack pointer for that
8734 target. */
8735 if (TARGET_SEH)
8736 m->fs.sp_valid = false;
8738 /* If SP offset is non-immediate after allocation of the stack frame,
8739 then emit SSE saves or stub call prior to allocating the rest of the
8740 stack frame. This is less efficient for the out-of-line stub because
8741 we can't combine allocations across the call barrier, but it's better
8742 than using a scratch register. */
8743 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8744 - m->fs.sp_realigned_offset),
8745 Pmode))
8747 if (!sse_registers_saved)
8749 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8750 sse_registers_saved = true;
8752 else if (save_stub_call_needed)
8754 ix86_emit_outlined_ms2sysv_save (frame);
8755 save_stub_call_needed = false;
8760 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8762 if (flag_stack_usage_info)
8764 /* We start to count from ARG_POINTER. */
8765 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8767 /* If it was realigned, take into account the fake frame. */
8768 if (stack_realign_drap)
8770 if (ix86_static_chain_on_stack)
8771 stack_size += UNITS_PER_WORD;
8773 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8774 stack_size += UNITS_PER_WORD;
8776 /* This over-estimates by 1 minimal-stack-alignment-unit but
8777 mitigates that by counting in the new return address slot. */
8778 current_function_dynamic_stack_size
8779 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8782 current_function_static_stack_size = stack_size;
8785 /* On SEH target with very large frame size, allocate an area to save
8786 SSE registers (as the very large allocation won't be described). */
8787 if (TARGET_SEH
8788 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8789 && !sse_registers_saved)
8791 HOST_WIDE_INT sse_size
8792 = frame.sse_reg_save_offset - frame.reg_save_offset;
8794 gcc_assert (int_registers_saved);
8796 /* No need to do stack checking as the area will be immediately
8797 written. */
8798 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8799 GEN_INT (-sse_size), -1,
8800 m->fs.cfa_reg == stack_pointer_rtx);
8801 allocate -= sse_size;
8802 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8803 sse_registers_saved = true;
8806 /* If stack clash protection is requested, then probe the stack, unless it
8807 is already probed on the target. */
8808 if (allocate >= 0
8809 && flag_stack_clash_protection
8810 && !ix86_target_stack_probe ())
8812 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
8813 allocate = 0;
8816 /* The stack has already been decremented by the instruction calling us
8817 so probe if the size is non-negative to preserve the protection area. */
8818 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8820 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8822 if (STACK_CHECK_MOVING_SP)
8824 if (crtl->is_leaf
8825 && !cfun->calls_alloca
8826 && allocate <= probe_interval)
8829 else
8831 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
8832 allocate = 0;
8836 else
8838 HOST_WIDE_INT size = allocate;
8840 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8841 size = 0x80000000 - get_stack_check_protect () - 1;
8843 if (TARGET_STACK_PROBE)
8845 if (crtl->is_leaf && !cfun->calls_alloca)
8847 if (size > probe_interval)
8848 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8850 else
8851 ix86_emit_probe_stack_range (0,
8852 size + get_stack_check_protect (),
8853 int_registers_saved);
8855 else
8857 if (crtl->is_leaf && !cfun->calls_alloca)
8859 if (size > probe_interval
8860 && size > get_stack_check_protect ())
8861 ix86_emit_probe_stack_range (get_stack_check_protect (),
8862 (size
8863 - get_stack_check_protect ()),
8864 int_registers_saved);
8866 else
8867 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8868 int_registers_saved);
8873 if (allocate == 0)
8875 else if (!ix86_target_stack_probe ()
8876 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8878 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8879 GEN_INT (-allocate), -1,
8880 m->fs.cfa_reg == stack_pointer_rtx);
8882 else
8884 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8885 rtx r10 = NULL;
8886 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8887 bool eax_live = ix86_eax_live_at_start_p ();
8888 bool r10_live = false;
8890 if (TARGET_64BIT)
8891 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8893 if (eax_live)
8895 insn = emit_insn (gen_push (eax));
8896 allocate -= UNITS_PER_WORD;
8897 /* Note that SEH directives need to continue tracking the stack
8898 pointer even after the frame pointer has been set up. */
8899 if (sp_is_cfa_reg || TARGET_SEH)
8901 if (sp_is_cfa_reg)
8902 m->fs.cfa_offset += UNITS_PER_WORD;
8903 RTX_FRAME_RELATED_P (insn) = 1;
8904 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8905 gen_rtx_SET (stack_pointer_rtx,
8906 plus_constant (Pmode,
8907 stack_pointer_rtx,
8908 -UNITS_PER_WORD)));
8912 if (r10_live)
8914 r10 = gen_rtx_REG (Pmode, R10_REG);
8915 insn = emit_insn (gen_push (r10));
8916 allocate -= UNITS_PER_WORD;
8917 if (sp_is_cfa_reg || TARGET_SEH)
8919 if (sp_is_cfa_reg)
8920 m->fs.cfa_offset += UNITS_PER_WORD;
8921 RTX_FRAME_RELATED_P (insn) = 1;
8922 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8923 gen_rtx_SET (stack_pointer_rtx,
8924 plus_constant (Pmode,
8925 stack_pointer_rtx,
8926 -UNITS_PER_WORD)));
8930 emit_move_insn (eax, GEN_INT (allocate));
8931 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8933 /* Use the fact that AX still contains ALLOCATE. */
8934 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8935 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8937 if (sp_is_cfa_reg || TARGET_SEH)
8939 if (sp_is_cfa_reg)
8940 m->fs.cfa_offset += allocate;
8941 RTX_FRAME_RELATED_P (insn) = 1;
8942 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8943 gen_rtx_SET (stack_pointer_rtx,
8944 plus_constant (Pmode, stack_pointer_rtx,
8945 -allocate)));
8947 m->fs.sp_offset += allocate;
8949 /* Use stack_pointer_rtx for relative addressing so that code works for
8950 realigned stack. But this means that we need a blockage to prevent
8951 stores based on the frame pointer from being scheduled before. */
8952 if (r10_live && eax_live)
8954 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8955 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8956 gen_frame_mem (word_mode, t));
8957 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8958 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8959 gen_frame_mem (word_mode, t));
8960 emit_insn (gen_memory_blockage ());
8962 else if (eax_live || r10_live)
8964 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8965 emit_move_insn (gen_rtx_REG (word_mode,
8966 (eax_live ? AX_REG : R10_REG)),
8967 gen_frame_mem (word_mode, t));
8968 emit_insn (gen_memory_blockage ());
8971 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8973 /* If we havn't already set up the frame pointer, do so now. */
8974 if (frame_pointer_needed && !m->fs.fp_valid)
8976 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8977 GEN_INT (frame.stack_pointer_offset
8978 - frame.hard_frame_pointer_offset));
8979 insn = emit_insn (insn);
8980 RTX_FRAME_RELATED_P (insn) = 1;
8981 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8983 if (m->fs.cfa_reg == stack_pointer_rtx)
8984 m->fs.cfa_reg = hard_frame_pointer_rtx;
8985 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8986 m->fs.fp_valid = true;
8989 if (!int_registers_saved)
8990 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8991 if (!sse_registers_saved)
8992 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8993 else if (save_stub_call_needed)
8994 ix86_emit_outlined_ms2sysv_save (frame);
8996 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8997 in PROLOGUE. */
8998 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
9000 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
9001 insn = emit_insn (gen_set_got (pic));
9002 RTX_FRAME_RELATED_P (insn) = 1;
9003 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
9004 emit_insn (gen_prologue_use (pic));
9005 /* Deleting already emmitted SET_GOT if exist and allocated to
9006 REAL_PIC_OFFSET_TABLE_REGNUM. */
9007 ix86_elim_entry_set_got (pic);
9010 if (crtl->drap_reg && !crtl->stack_realign_needed)
9012 /* vDRAP is setup but after reload it turns out stack realign
9013 isn't necessary, here we will emit prologue to setup DRAP
9014 without stack realign adjustment */
9015 t = choose_baseaddr (0, NULL);
9016 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
9019 /* Prevent instructions from being scheduled into register save push
9020 sequence when access to the redzone area is done through frame pointer.
9021 The offset between the frame pointer and the stack pointer is calculated
9022 relative to the value of the stack pointer at the end of the function
9023 prologue, and moving instructions that access redzone area via frame
9024 pointer inside push sequence violates this assumption. */
9025 if (frame_pointer_needed && frame.red_zone_size)
9026 emit_insn (gen_memory_blockage ());
9028 /* SEH requires that the prologue end within 256 bytes of the start of
9029 the function. Prevent instruction schedules that would extend that.
9030 Further, prevent alloca modifications to the stack pointer from being
9031 combined with prologue modifications. */
9032 if (TARGET_SEH)
9033 emit_insn (gen_prologue_use (stack_pointer_rtx));
9036 /* Emit code to restore REG using a POP insn. */
9038 static void
9039 ix86_emit_restore_reg_using_pop (rtx reg)
9041 struct machine_function *m = cfun->machine;
9042 rtx_insn *insn = emit_insn (gen_pop (reg));
9044 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9045 m->fs.sp_offset -= UNITS_PER_WORD;
9047 if (m->fs.cfa_reg == crtl->drap_reg
9048 && REGNO (reg) == REGNO (crtl->drap_reg))
9050 /* Previously we'd represented the CFA as an expression
9051 like *(%ebp - 8). We've just popped that value from
9052 the stack, which means we need to reset the CFA to
9053 the drap register. This will remain until we restore
9054 the stack pointer. */
9055 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9056 RTX_FRAME_RELATED_P (insn) = 1;
9058 /* This means that the DRAP register is valid for addressing too. */
9059 m->fs.drap_valid = true;
9060 return;
9063 if (m->fs.cfa_reg == stack_pointer_rtx)
9065 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9066 x = gen_rtx_SET (stack_pointer_rtx, x);
9067 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9068 RTX_FRAME_RELATED_P (insn) = 1;
9070 m->fs.cfa_offset -= UNITS_PER_WORD;
9073 /* When the frame pointer is the CFA, and we pop it, we are
9074 swapping back to the stack pointer as the CFA. This happens
9075 for stack frames that don't allocate other data, so we assume
9076 the stack pointer is now pointing at the return address, i.e.
9077 the function entry state, which makes the offset be 1 word. */
9078 if (reg == hard_frame_pointer_rtx)
9080 m->fs.fp_valid = false;
9081 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9083 m->fs.cfa_reg = stack_pointer_rtx;
9084 m->fs.cfa_offset -= UNITS_PER_WORD;
9086 add_reg_note (insn, REG_CFA_DEF_CFA,
9087 plus_constant (Pmode, stack_pointer_rtx,
9088 m->fs.cfa_offset));
9089 RTX_FRAME_RELATED_P (insn) = 1;
9094 /* Emit code to restore saved registers using POP insns. */
9096 static void
9097 ix86_emit_restore_regs_using_pop (void)
9099 unsigned int regno;
9101 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9102 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9103 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
9106 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9107 omits the emit and only attaches the notes. */
9109 static void
9110 ix86_emit_leave (rtx_insn *insn)
9112 struct machine_function *m = cfun->machine;
9114 if (!insn)
9115 insn = emit_insn (gen_leave (word_mode));
9117 ix86_add_queued_cfa_restore_notes (insn);
9119 gcc_assert (m->fs.fp_valid);
9120 m->fs.sp_valid = true;
9121 m->fs.sp_realigned = false;
9122 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9123 m->fs.fp_valid = false;
9125 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9127 m->fs.cfa_reg = stack_pointer_rtx;
9128 m->fs.cfa_offset = m->fs.sp_offset;
9130 add_reg_note (insn, REG_CFA_DEF_CFA,
9131 plus_constant (Pmode, stack_pointer_rtx,
9132 m->fs.sp_offset));
9133 RTX_FRAME_RELATED_P (insn) = 1;
9135 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9136 m->fs.fp_offset);
9139 /* Emit code to restore saved registers using MOV insns.
9140 First register is restored from CFA - CFA_OFFSET. */
9141 static void
9142 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9143 bool maybe_eh_return)
9145 struct machine_function *m = cfun->machine;
9146 unsigned int regno;
9148 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9149 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9151 rtx reg = gen_rtx_REG (word_mode, regno);
9152 rtx mem;
9153 rtx_insn *insn;
9155 mem = choose_baseaddr (cfa_offset, NULL);
9156 mem = gen_frame_mem (word_mode, mem);
9157 insn = emit_move_insn (reg, mem);
9159 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9161 /* Previously we'd represented the CFA as an expression
9162 like *(%ebp - 8). We've just popped that value from
9163 the stack, which means we need to reset the CFA to
9164 the drap register. This will remain until we restore
9165 the stack pointer. */
9166 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9167 RTX_FRAME_RELATED_P (insn) = 1;
9169 /* This means that the DRAP register is valid for addressing. */
9170 m->fs.drap_valid = true;
9172 else
9173 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9175 cfa_offset -= UNITS_PER_WORD;
9179 /* Emit code to restore saved registers using MOV insns.
9180 First register is restored from CFA - CFA_OFFSET. */
9181 static void
9182 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9183 bool maybe_eh_return)
9185 unsigned int regno;
9187 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9188 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9190 rtx reg = gen_rtx_REG (V4SFmode, regno);
9191 rtx mem;
9192 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9194 mem = choose_baseaddr (cfa_offset, &align);
9195 mem = gen_rtx_MEM (V4SFmode, mem);
9197 /* The location aligment depends upon the base register. */
9198 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9199 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9200 set_mem_align (mem, align);
9201 emit_insn (gen_rtx_SET (reg, mem));
9203 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9205 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9209 static void
9210 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9211 bool use_call, int style)
9213 struct machine_function *m = cfun->machine;
9214 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9215 + m->call_ms2sysv_extra_regs;
9216 rtvec v;
9217 unsigned int elems_needed, align, i, vi = 0;
9218 rtx_insn *insn;
9219 rtx sym, tmp;
9220 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9221 rtx r10 = NULL_RTX;
9222 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9223 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9224 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9225 rtx rsi_frame_load = NULL_RTX;
9226 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9227 enum xlogue_stub stub;
9229 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9231 /* If using a realigned stack, we should never start with padding. */
9232 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9234 /* Setup RSI as the stub's base pointer. */
9235 align = GET_MODE_ALIGNMENT (V4SFmode);
9236 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
9237 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9239 emit_insn (gen_rtx_SET (rsi, tmp));
9241 /* Get a symbol for the stub. */
9242 if (frame_pointer_needed)
9243 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9244 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9245 else
9246 stub = use_call ? XLOGUE_STUB_RESTORE
9247 : XLOGUE_STUB_RESTORE_TAIL;
9248 sym = xlogue.get_stub_rtx (stub);
9250 elems_needed = ncregs;
9251 if (use_call)
9252 elems_needed += 1;
9253 else
9254 elems_needed += frame_pointer_needed ? 5 : 3;
9255 v = rtvec_alloc (elems_needed);
9257 /* We call the epilogue stub when we need to pop incoming args or we are
9258 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9259 epilogue stub and it is the tail-call. */
9260 if (use_call)
9261 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9262 else
9264 RTVEC_ELT (v, vi++) = ret_rtx;
9265 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9266 if (frame_pointer_needed)
9268 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9269 gcc_assert (m->fs.fp_valid);
9270 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9272 tmp = plus_constant (DImode, rbp, 8);
9273 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9274 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9275 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9276 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9278 else
9280 /* If no hard frame pointer, we set R10 to the SP restore value. */
9281 gcc_assert (!m->fs.fp_valid);
9282 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9283 gcc_assert (m->fs.sp_valid);
9285 r10 = gen_rtx_REG (DImode, R10_REG);
9286 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9287 emit_insn (gen_rtx_SET (r10, tmp));
9289 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9293 /* Generate frame load insns and restore notes. */
9294 for (i = 0; i < ncregs; ++i)
9296 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9297 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9298 rtx reg, frame_load;
9300 reg = gen_rtx_REG (mode, r.regno);
9301 frame_load = gen_frame_load (reg, rsi, r.offset);
9303 /* Save RSI frame load insn & note to add last. */
9304 if (r.regno == SI_REG)
9306 gcc_assert (!rsi_frame_load);
9307 rsi_frame_load = frame_load;
9308 rsi_restore_offset = r.offset;
9310 else
9312 RTVEC_ELT (v, vi++) = frame_load;
9313 ix86_add_cfa_restore_note (NULL, reg, r.offset);
9317 /* Add RSI frame load & restore note at the end. */
9318 gcc_assert (rsi_frame_load);
9319 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9320 RTVEC_ELT (v, vi++) = rsi_frame_load;
9321 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9322 rsi_restore_offset);
9324 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9325 if (!use_call && !frame_pointer_needed)
9327 gcc_assert (m->fs.sp_valid);
9328 gcc_assert (!m->fs.sp_realigned);
9330 /* At this point, R10 should point to frame.stack_realign_offset. */
9331 if (m->fs.cfa_reg == stack_pointer_rtx)
9332 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9333 m->fs.sp_offset = frame.stack_realign_offset;
9336 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9337 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9338 if (use_call)
9339 insn = emit_insn (tmp);
9340 else
9342 insn = emit_jump_insn (tmp);
9343 JUMP_LABEL (insn) = ret_rtx;
9345 if (frame_pointer_needed)
9346 ix86_emit_leave (insn);
9347 else
9349 /* Need CFA adjust note. */
9350 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9351 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9355 RTX_FRAME_RELATED_P (insn) = true;
9356 ix86_add_queued_cfa_restore_notes (insn);
9358 /* If we're not doing a tail-call, we need to adjust the stack. */
9359 if (use_call && m->fs.sp_valid)
9361 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9362 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9363 GEN_INT (dealloc), style,
9364 m->fs.cfa_reg == stack_pointer_rtx);
9368 /* Restore function stack, frame, and registers. */
9370 void
9371 ix86_expand_epilogue (int style)
9373 struct machine_function *m = cfun->machine;
9374 struct machine_frame_state frame_state_save = m->fs;
9375 bool restore_regs_via_mov;
9376 bool using_drap;
9377 bool restore_stub_is_tail = false;
9379 if (ix86_function_naked (current_function_decl))
9381 /* The program should not reach this point. */
9382 emit_insn (gen_ud2 ());
9383 return;
9386 ix86_finalize_stack_frame_flags ();
9387 const struct ix86_frame &frame = cfun->machine->frame;
9389 m->fs.sp_realigned = stack_realign_fp;
9390 m->fs.sp_valid = stack_realign_fp
9391 || !frame_pointer_needed
9392 || crtl->sp_is_unchanging;
9393 gcc_assert (!m->fs.sp_valid
9394 || m->fs.sp_offset == frame.stack_pointer_offset);
9396 /* The FP must be valid if the frame pointer is present. */
9397 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9398 gcc_assert (!m->fs.fp_valid
9399 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9401 /* We must have *some* valid pointer to the stack frame. */
9402 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9404 /* The DRAP is never valid at this point. */
9405 gcc_assert (!m->fs.drap_valid);
9407 /* See the comment about red zone and frame
9408 pointer usage in ix86_expand_prologue. */
9409 if (frame_pointer_needed && frame.red_zone_size)
9410 emit_insn (gen_memory_blockage ());
9412 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9413 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9415 /* Determine the CFA offset of the end of the red-zone. */
9416 m->fs.red_zone_offset = 0;
9417 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9419 /* The red-zone begins below return address and error code in
9420 exception handler. */
9421 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9423 /* When the register save area is in the aligned portion of
9424 the stack, determine the maximum runtime displacement that
9425 matches up with the aligned frame. */
9426 if (stack_realign_drap)
9427 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9428 + UNITS_PER_WORD);
9431 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9433 /* Special care must be taken for the normal return case of a function
9434 using eh_return: the eax and edx registers are marked as saved, but
9435 not restored along this path. Adjust the save location to match. */
9436 if (crtl->calls_eh_return && style != 2)
9437 reg_save_offset -= 2 * UNITS_PER_WORD;
9439 /* EH_RETURN requires the use of moves to function properly. */
9440 if (crtl->calls_eh_return)
9441 restore_regs_via_mov = true;
9442 /* SEH requires the use of pops to identify the epilogue. */
9443 else if (TARGET_SEH)
9444 restore_regs_via_mov = false;
9445 /* If we're only restoring one register and sp cannot be used then
9446 using a move instruction to restore the register since it's
9447 less work than reloading sp and popping the register. */
9448 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9449 restore_regs_via_mov = true;
9450 else if (TARGET_EPILOGUE_USING_MOVE
9451 && cfun->machine->use_fast_prologue_epilogue
9452 && (frame.nregs > 1
9453 || m->fs.sp_offset != reg_save_offset))
9454 restore_regs_via_mov = true;
9455 else if (frame_pointer_needed
9456 && !frame.nregs
9457 && m->fs.sp_offset != reg_save_offset)
9458 restore_regs_via_mov = true;
9459 else if (frame_pointer_needed
9460 && TARGET_USE_LEAVE
9461 && cfun->machine->use_fast_prologue_epilogue
9462 && frame.nregs == 1)
9463 restore_regs_via_mov = true;
9464 else
9465 restore_regs_via_mov = false;
9467 if (restore_regs_via_mov || frame.nsseregs)
9469 /* Ensure that the entire register save area is addressable via
9470 the stack pointer, if we will restore SSE regs via sp. */
9471 if (TARGET_64BIT
9472 && m->fs.sp_offset > 0x7fffffff
9473 && sp_valid_at (frame.stack_realign_offset + 1)
9474 && (frame.nsseregs + frame.nregs) != 0)
9476 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9477 GEN_INT (m->fs.sp_offset
9478 - frame.sse_reg_save_offset),
9479 style,
9480 m->fs.cfa_reg == stack_pointer_rtx);
9484 /* If there are any SSE registers to restore, then we have to do it
9485 via moves, since there's obviously no pop for SSE regs. */
9486 if (frame.nsseregs)
9487 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9488 style == 2);
9490 if (m->call_ms2sysv)
9492 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9494 /* We cannot use a tail-call for the stub if:
9495 1. We have to pop incoming args,
9496 2. We have additional int regs to restore, or
9497 3. A sibling call will be the tail-call, or
9498 4. We are emitting an eh_return_internal epilogue.
9500 TODO: Item 4 has not yet tested!
9502 If any of the above are true, we will call the stub rather than
9503 jump to it. */
9504 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9505 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9508 /* If using out-of-line stub that is a tail-call, then...*/
9509 if (m->call_ms2sysv && restore_stub_is_tail)
9511 /* TODO: parinoid tests. (remove eventually) */
9512 gcc_assert (m->fs.sp_valid);
9513 gcc_assert (!m->fs.sp_realigned);
9514 gcc_assert (!m->fs.fp_valid);
9515 gcc_assert (!m->fs.realigned);
9516 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9517 gcc_assert (!crtl->drap_reg);
9518 gcc_assert (!frame.nregs);
9520 else if (restore_regs_via_mov)
9522 rtx t;
9524 if (frame.nregs)
9525 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9527 /* eh_return epilogues need %ecx added to the stack pointer. */
9528 if (style == 2)
9530 rtx sa = EH_RETURN_STACKADJ_RTX;
9531 rtx_insn *insn;
9533 /* Stack realignment doesn't work with eh_return. */
9534 if (crtl->stack_realign_needed)
9535 sorry ("Stack realignment not supported with "
9536 "%<__builtin_eh_return%>");
9538 /* regparm nested functions don't work with eh_return. */
9539 if (ix86_static_chain_on_stack)
9540 sorry ("regparm nested function not supported with "
9541 "%<__builtin_eh_return%>");
9543 if (frame_pointer_needed)
9545 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9546 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9547 emit_insn (gen_rtx_SET (sa, t));
9549 /* NB: eh_return epilogues must restore the frame pointer
9550 in word_mode since the upper 32 bits of RBP register
9551 can have any values. */
9552 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9553 rtx frame_reg = gen_rtx_REG (word_mode,
9554 HARD_FRAME_POINTER_REGNUM);
9555 insn = emit_move_insn (frame_reg, t);
9557 /* Note that we use SA as a temporary CFA, as the return
9558 address is at the proper place relative to it. We
9559 pretend this happens at the FP restore insn because
9560 prior to this insn the FP would be stored at the wrong
9561 offset relative to SA, and after this insn we have no
9562 other reasonable register to use for the CFA. We don't
9563 bother resetting the CFA to the SP for the duration of
9564 the return insn, unless the control flow instrumentation
9565 is done. In this case the SP is used later and we have
9566 to reset CFA to SP. */
9567 add_reg_note (insn, REG_CFA_DEF_CFA,
9568 plus_constant (Pmode, sa, UNITS_PER_WORD));
9569 ix86_add_queued_cfa_restore_notes (insn);
9570 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9571 RTX_FRAME_RELATED_P (insn) = 1;
9573 m->fs.cfa_reg = sa;
9574 m->fs.cfa_offset = UNITS_PER_WORD;
9575 m->fs.fp_valid = false;
9577 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9578 const0_rtx, style,
9579 flag_cf_protection);
9581 else
9583 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9584 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9585 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9586 ix86_add_queued_cfa_restore_notes (insn);
9588 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9589 if (m->fs.cfa_offset != UNITS_PER_WORD)
9591 m->fs.cfa_offset = UNITS_PER_WORD;
9592 add_reg_note (insn, REG_CFA_DEF_CFA,
9593 plus_constant (Pmode, stack_pointer_rtx,
9594 UNITS_PER_WORD));
9595 RTX_FRAME_RELATED_P (insn) = 1;
9598 m->fs.sp_offset = UNITS_PER_WORD;
9599 m->fs.sp_valid = true;
9600 m->fs.sp_realigned = false;
9603 else
9605 /* SEH requires that the function end with (1) a stack adjustment
9606 if necessary, (2) a sequence of pops, and (3) a return or
9607 jump instruction. Prevent insns from the function body from
9608 being scheduled into this sequence. */
9609 if (TARGET_SEH)
9611 /* Prevent a catch region from being adjacent to the standard
9612 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9613 nor several other flags that would be interesting to test are
9614 set up yet. */
9615 if (flag_non_call_exceptions)
9616 emit_insn (gen_nops (const1_rtx));
9617 else
9618 emit_insn (gen_blockage ());
9621 /* First step is to deallocate the stack frame so that we can
9622 pop the registers. If the stack pointer was realigned, it needs
9623 to be restored now. Also do it on SEH target for very large
9624 frame as the emitted instructions aren't allowed by the ABI
9625 in epilogues. */
9626 if (!m->fs.sp_valid || m->fs.sp_realigned
9627 || (TARGET_SEH
9628 && (m->fs.sp_offset - reg_save_offset
9629 >= SEH_MAX_FRAME_SIZE)))
9631 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9632 GEN_INT (m->fs.fp_offset
9633 - reg_save_offset),
9634 style, false);
9636 else if (m->fs.sp_offset != reg_save_offset)
9638 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9639 GEN_INT (m->fs.sp_offset
9640 - reg_save_offset),
9641 style,
9642 m->fs.cfa_reg == stack_pointer_rtx);
9645 ix86_emit_restore_regs_using_pop ();
9648 /* If we used a stack pointer and haven't already got rid of it,
9649 then do so now. */
9650 if (m->fs.fp_valid)
9652 /* If the stack pointer is valid and pointing at the frame
9653 pointer store address, then we only need a pop. */
9654 if (sp_valid_at (frame.hfp_save_offset)
9655 && m->fs.sp_offset == frame.hfp_save_offset)
9656 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9657 /* Leave results in shorter dependency chains on CPUs that are
9658 able to grok it fast. */
9659 else if (TARGET_USE_LEAVE
9660 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9661 || !cfun->machine->use_fast_prologue_epilogue)
9662 ix86_emit_leave (NULL);
9663 else
9665 pro_epilogue_adjust_stack (stack_pointer_rtx,
9666 hard_frame_pointer_rtx,
9667 const0_rtx, style, !using_drap);
9668 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9672 if (using_drap)
9674 int param_ptr_offset = UNITS_PER_WORD;
9675 rtx_insn *insn;
9677 gcc_assert (stack_realign_drap);
9679 if (ix86_static_chain_on_stack)
9680 param_ptr_offset += UNITS_PER_WORD;
9681 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9682 param_ptr_offset += UNITS_PER_WORD;
9684 insn = emit_insn (gen_rtx_SET
9685 (stack_pointer_rtx,
9686 plus_constant (Pmode, crtl->drap_reg,
9687 -param_ptr_offset)));
9688 m->fs.cfa_reg = stack_pointer_rtx;
9689 m->fs.cfa_offset = param_ptr_offset;
9690 m->fs.sp_offset = param_ptr_offset;
9691 m->fs.realigned = false;
9693 add_reg_note (insn, REG_CFA_DEF_CFA,
9694 plus_constant (Pmode, stack_pointer_rtx,
9695 param_ptr_offset));
9696 RTX_FRAME_RELATED_P (insn) = 1;
9698 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9699 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9702 /* At this point the stack pointer must be valid, and we must have
9703 restored all of the registers. We may not have deallocated the
9704 entire stack frame. We've delayed this until now because it may
9705 be possible to merge the local stack deallocation with the
9706 deallocation forced by ix86_static_chain_on_stack. */
9707 gcc_assert (m->fs.sp_valid);
9708 gcc_assert (!m->fs.sp_realigned);
9709 gcc_assert (!m->fs.fp_valid);
9710 gcc_assert (!m->fs.realigned);
9711 if (m->fs.sp_offset != UNITS_PER_WORD)
9713 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9714 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9715 style, true);
9717 else
9718 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9720 /* Sibcall epilogues don't want a return instruction. */
9721 if (style == 0)
9723 m->fs = frame_state_save;
9724 return;
9727 if (cfun->machine->func_type != TYPE_NORMAL)
9728 emit_jump_insn (gen_interrupt_return ());
9729 else if (crtl->args.pops_args && crtl->args.size)
9731 rtx popc = GEN_INT (crtl->args.pops_args);
9733 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9734 address, do explicit add, and jump indirectly to the caller. */
9736 if (crtl->args.pops_args >= 65536)
9738 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9739 rtx_insn *insn;
9741 /* There is no "pascal" calling convention in any 64bit ABI. */
9742 gcc_assert (!TARGET_64BIT);
9744 insn = emit_insn (gen_pop (ecx));
9745 m->fs.cfa_offset -= UNITS_PER_WORD;
9746 m->fs.sp_offset -= UNITS_PER_WORD;
9748 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9749 x = gen_rtx_SET (stack_pointer_rtx, x);
9750 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9751 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9752 RTX_FRAME_RELATED_P (insn) = 1;
9754 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9755 popc, -1, true);
9756 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9758 else
9759 emit_jump_insn (gen_simple_return_pop_internal (popc));
9761 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9763 /* In case of return from EH a simple return cannot be used
9764 as a return address will be compared with a shadow stack
9765 return address. Use indirect jump instead. */
9766 if (style == 2 && flag_cf_protection)
9768 /* Register used in indirect jump must be in word_mode. But
9769 Pmode may not be the same as word_mode for x32. */
9770 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9771 rtx_insn *insn;
9773 insn = emit_insn (gen_pop (ecx));
9774 m->fs.cfa_offset -= UNITS_PER_WORD;
9775 m->fs.sp_offset -= UNITS_PER_WORD;
9777 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9778 x = gen_rtx_SET (stack_pointer_rtx, x);
9779 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9780 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9781 RTX_FRAME_RELATED_P (insn) = 1;
9783 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9785 else
9786 emit_jump_insn (gen_simple_return_internal ());
9789 /* Restore the state back to the state from the prologue,
9790 so that it's correct for the next epilogue. */
9791 m->fs = frame_state_save;
9794 /* Reset from the function's potential modifications. */
9796 static void
9797 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9799 if (pic_offset_table_rtx
9800 && !ix86_use_pseudo_pic_reg ())
9801 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9803 if (TARGET_MACHO)
9805 rtx_insn *insn = get_last_insn ();
9806 rtx_insn *deleted_debug_label = NULL;
9808 /* Mach-O doesn't support labels at the end of objects, so if
9809 it looks like we might want one, take special action.
9810 First, collect any sequence of deleted debug labels. */
9811 while (insn
9812 && NOTE_P (insn)
9813 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9815 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9816 notes only, instead set their CODE_LABEL_NUMBER to -1,
9817 otherwise there would be code generation differences
9818 in between -g and -g0. */
9819 if (NOTE_P (insn) && NOTE_KIND (insn)
9820 == NOTE_INSN_DELETED_DEBUG_LABEL)
9821 deleted_debug_label = insn;
9822 insn = PREV_INSN (insn);
9825 /* If we have:
9826 label:
9827 barrier
9828 then this needs to be detected, so skip past the barrier. */
9830 if (insn && BARRIER_P (insn))
9831 insn = PREV_INSN (insn);
9833 /* Up to now we've only seen notes or barriers. */
9834 if (insn)
9836 if (LABEL_P (insn)
9837 || (NOTE_P (insn)
9838 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9839 /* Trailing label. */
9840 fputs ("\tnop\n", file);
9841 else if (cfun && ! cfun->is_thunk)
9843 /* See if we have a completely empty function body, skipping
9844 the special case of the picbase thunk emitted as asm. */
9845 while (insn && ! INSN_P (insn))
9846 insn = PREV_INSN (insn);
9847 /* If we don't find any insns, we've got an empty function body;
9848 I.e. completely empty - without a return or branch. This is
9849 taken as the case where a function body has been removed
9850 because it contains an inline __builtin_unreachable(). GCC
9851 declares that reaching __builtin_unreachable() means UB so
9852 we're not obliged to do anything special; however, we want
9853 non-zero-sized function bodies. To meet this, and help the
9854 user out, let's trap the case. */
9855 if (insn == NULL)
9856 fputs ("\tud2\n", file);
9859 else if (deleted_debug_label)
9860 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9861 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9862 CODE_LABEL_NUMBER (insn) = -1;
9866 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9868 void
9869 ix86_print_patchable_function_entry (FILE *file,
9870 unsigned HOST_WIDE_INT patch_area_size,
9871 bool record_p)
9873 if (cfun->machine->function_label_emitted)
9875 /* NB: When ix86_print_patchable_function_entry is called after
9876 function table has been emitted, we have inserted or queued
9877 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9878 place. There is nothing to do here. */
9879 return;
9882 default_print_patchable_function_entry (file, patch_area_size,
9883 record_p);
9886 /* Output patchable area. NB: default_print_patchable_function_entry
9887 isn't available in i386.md. */
9889 void
9890 ix86_output_patchable_area (unsigned int patch_area_size,
9891 bool record_p)
9893 default_print_patchable_function_entry (asm_out_file,
9894 patch_area_size,
9895 record_p);
9898 /* Return a scratch register to use in the split stack prologue. The
9899 split stack prologue is used for -fsplit-stack. It is the first
9900 instructions in the function, even before the regular prologue.
9901 The scratch register can be any caller-saved register which is not
9902 used for parameters or for the static chain. */
9904 static unsigned int
9905 split_stack_prologue_scratch_regno (void)
9907 if (TARGET_64BIT)
9908 return R11_REG;
9909 else
9911 bool is_fastcall, is_thiscall;
9912 int regparm;
9914 is_fastcall = (lookup_attribute ("fastcall",
9915 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9916 != NULL);
9917 is_thiscall = (lookup_attribute ("thiscall",
9918 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9919 != NULL);
9920 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9922 if (is_fastcall)
9924 if (DECL_STATIC_CHAIN (cfun->decl))
9926 sorry ("%<-fsplit-stack%> does not support fastcall with "
9927 "nested function");
9928 return INVALID_REGNUM;
9930 return AX_REG;
9932 else if (is_thiscall)
9934 if (!DECL_STATIC_CHAIN (cfun->decl))
9935 return DX_REG;
9936 return AX_REG;
9938 else if (regparm < 3)
9940 if (!DECL_STATIC_CHAIN (cfun->decl))
9941 return CX_REG;
9942 else
9944 if (regparm >= 2)
9946 sorry ("%<-fsplit-stack%> does not support 2 register "
9947 "parameters for a nested function");
9948 return INVALID_REGNUM;
9950 return DX_REG;
9953 else
9955 /* FIXME: We could make this work by pushing a register
9956 around the addition and comparison. */
9957 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9958 return INVALID_REGNUM;
9963 /* A SYMBOL_REF for the function which allocates new stackspace for
9964 -fsplit-stack. */
9966 static GTY(()) rtx split_stack_fn;
9968 /* A SYMBOL_REF for the more stack function when using the large
9969 model. */
9971 static GTY(()) rtx split_stack_fn_large;
9973 /* Return location of the stack guard value in the TLS block. */
9976 ix86_split_stack_guard (void)
9978 int offset;
9979 addr_space_t as = DEFAULT_TLS_SEG_REG;
9980 rtx r;
9982 gcc_assert (flag_split_stack);
9984 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9985 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9986 #else
9987 gcc_unreachable ();
9988 #endif
9990 r = GEN_INT (offset);
9991 r = gen_const_mem (Pmode, r);
9992 set_mem_addr_space (r, as);
9994 return r;
9997 /* Handle -fsplit-stack. These are the first instructions in the
9998 function, even before the regular prologue. */
10000 void
10001 ix86_expand_split_stack_prologue (void)
10003 HOST_WIDE_INT allocate;
10004 unsigned HOST_WIDE_INT args_size;
10005 rtx_code_label *label;
10006 rtx limit, current, allocate_rtx, call_fusage;
10007 rtx_insn *call_insn;
10008 rtx scratch_reg = NULL_RTX;
10009 rtx_code_label *varargs_label = NULL;
10010 rtx fn;
10012 gcc_assert (flag_split_stack && reload_completed);
10014 ix86_finalize_stack_frame_flags ();
10015 struct ix86_frame &frame = cfun->machine->frame;
10016 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10018 /* This is the label we will branch to if we have enough stack
10019 space. We expect the basic block reordering pass to reverse this
10020 branch if optimizing, so that we branch in the unlikely case. */
10021 label = gen_label_rtx ();
10023 /* We need to compare the stack pointer minus the frame size with
10024 the stack boundary in the TCB. The stack boundary always gives
10025 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10026 can compare directly. Otherwise we need to do an addition. */
10028 limit = ix86_split_stack_guard ();
10030 if (allocate < SPLIT_STACK_AVAILABLE)
10031 current = stack_pointer_rtx;
10032 else
10034 unsigned int scratch_regno;
10035 rtx offset;
10037 /* We need a scratch register to hold the stack pointer minus
10038 the required frame size. Since this is the very start of the
10039 function, the scratch register can be any caller-saved
10040 register which is not used for parameters. */
10041 offset = GEN_INT (- allocate);
10042 scratch_regno = split_stack_prologue_scratch_regno ();
10043 if (scratch_regno == INVALID_REGNUM)
10044 return;
10045 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10046 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10048 /* We don't use gen_add in this case because it will
10049 want to split to lea, but when not optimizing the insn
10050 will not be split after this point. */
10051 emit_insn (gen_rtx_SET (scratch_reg,
10052 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10053 offset)));
10055 else
10057 emit_move_insn (scratch_reg, offset);
10058 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10060 current = scratch_reg;
10063 ix86_expand_branch (GEU, current, limit, label);
10064 rtx_insn *jump_insn = get_last_insn ();
10065 JUMP_LABEL (jump_insn) = label;
10067 /* Mark the jump as very likely to be taken. */
10068 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10070 if (split_stack_fn == NULL_RTX)
10072 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10073 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10075 fn = split_stack_fn;
10077 /* Get more stack space. We pass in the desired stack space and the
10078 size of the arguments to copy to the new stack. In 32-bit mode
10079 we push the parameters; __morestack will return on a new stack
10080 anyhow. In 64-bit mode we pass the parameters in r10 and
10081 r11. */
10082 allocate_rtx = GEN_INT (allocate);
10083 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10084 call_fusage = NULL_RTX;
10085 rtx pop = NULL_RTX;
10086 if (TARGET_64BIT)
10088 rtx reg10, reg11;
10090 reg10 = gen_rtx_REG (Pmode, R10_REG);
10091 reg11 = gen_rtx_REG (Pmode, R11_REG);
10093 /* If this function uses a static chain, it will be in %r10.
10094 Preserve it across the call to __morestack. */
10095 if (DECL_STATIC_CHAIN (cfun->decl))
10097 rtx rax;
10099 rax = gen_rtx_REG (word_mode, AX_REG);
10100 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10101 use_reg (&call_fusage, rax);
10104 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10105 && !TARGET_PECOFF)
10107 HOST_WIDE_INT argval;
10109 gcc_assert (Pmode == DImode);
10110 /* When using the large model we need to load the address
10111 into a register, and we've run out of registers. So we
10112 switch to a different calling convention, and we call a
10113 different function: __morestack_large. We pass the
10114 argument size in the upper 32 bits of r10 and pass the
10115 frame size in the lower 32 bits. */
10116 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10117 gcc_assert ((args_size & 0xffffffff) == args_size);
10119 if (split_stack_fn_large == NULL_RTX)
10121 split_stack_fn_large
10122 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10123 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10125 if (ix86_cmodel == CM_LARGE_PIC)
10127 rtx_code_label *label;
10128 rtx x;
10130 label = gen_label_rtx ();
10131 emit_label (label);
10132 LABEL_PRESERVE_P (label) = 1;
10133 emit_insn (gen_set_rip_rex64 (reg10, label));
10134 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10135 emit_insn (gen_add2_insn (reg10, reg11));
10136 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10137 UNSPEC_GOT);
10138 x = gen_rtx_CONST (Pmode, x);
10139 emit_move_insn (reg11, x);
10140 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10141 x = gen_const_mem (Pmode, x);
10142 emit_move_insn (reg11, x);
10144 else
10145 emit_move_insn (reg11, split_stack_fn_large);
10147 fn = reg11;
10149 argval = ((args_size << 16) << 16) + allocate;
10150 emit_move_insn (reg10, GEN_INT (argval));
10152 else
10154 emit_move_insn (reg10, allocate_rtx);
10155 emit_move_insn (reg11, GEN_INT (args_size));
10156 use_reg (&call_fusage, reg11);
10159 use_reg (&call_fusage, reg10);
10161 else
10163 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10164 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10165 insn = emit_insn (gen_push (allocate_rtx));
10166 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10167 pop = GEN_INT (2 * UNITS_PER_WORD);
10169 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10170 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10171 pop, false);
10172 add_function_usage_to (call_insn, call_fusage);
10173 if (!TARGET_64BIT)
10174 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10175 /* Indicate that this function can't jump to non-local gotos. */
10176 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10178 /* In order to make call/return prediction work right, we now need
10179 to execute a return instruction. See
10180 libgcc/config/i386/morestack.S for the details on how this works.
10182 For flow purposes gcc must not see this as a return
10183 instruction--we need control flow to continue at the subsequent
10184 label. Therefore, we use an unspec. */
10185 gcc_assert (crtl->args.pops_args < 65536);
10186 rtx_insn *ret_insn
10187 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10189 if ((flag_cf_protection & CF_BRANCH))
10191 /* Insert ENDBR since __morestack will jump back here via indirect
10192 call. */
10193 rtx cet_eb = gen_nop_endbr ();
10194 emit_insn_after (cet_eb, ret_insn);
10197 /* If we are in 64-bit mode and this function uses a static chain,
10198 we saved %r10 in %rax before calling _morestack. */
10199 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10200 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10201 gen_rtx_REG (word_mode, AX_REG));
10203 /* If this function calls va_start, we need to store a pointer to
10204 the arguments on the old stack, because they may not have been
10205 all copied to the new stack. At this point the old stack can be
10206 found at the frame pointer value used by __morestack, because
10207 __morestack has set that up before calling back to us. Here we
10208 store that pointer in a scratch register, and in
10209 ix86_expand_prologue we store the scratch register in a stack
10210 slot. */
10211 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10213 unsigned int scratch_regno;
10214 rtx frame_reg;
10215 int words;
10217 scratch_regno = split_stack_prologue_scratch_regno ();
10218 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10219 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10221 /* 64-bit:
10222 fp -> old fp value
10223 return address within this function
10224 return address of caller of this function
10225 stack arguments
10226 So we add three words to get to the stack arguments.
10228 32-bit:
10229 fp -> old fp value
10230 return address within this function
10231 first argument to __morestack
10232 second argument to __morestack
10233 return address of caller of this function
10234 stack arguments
10235 So we add five words to get to the stack arguments.
10237 words = TARGET_64BIT ? 3 : 5;
10238 emit_insn (gen_rtx_SET (scratch_reg,
10239 plus_constant (Pmode, frame_reg,
10240 words * UNITS_PER_WORD)));
10242 varargs_label = gen_label_rtx ();
10243 emit_jump_insn (gen_jump (varargs_label));
10244 JUMP_LABEL (get_last_insn ()) = varargs_label;
10246 emit_barrier ();
10249 emit_label (label);
10250 LABEL_NUSES (label) = 1;
10252 /* If this function calls va_start, we now have to set the scratch
10253 register for the case where we do not call __morestack. In this
10254 case we need to set it based on the stack pointer. */
10255 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10257 emit_insn (gen_rtx_SET (scratch_reg,
10258 plus_constant (Pmode, stack_pointer_rtx,
10259 UNITS_PER_WORD)));
10261 emit_label (varargs_label);
10262 LABEL_NUSES (varargs_label) = 1;
10266 /* We may have to tell the dataflow pass that the split stack prologue
10267 is initializing a scratch register. */
10269 static void
10270 ix86_live_on_entry (bitmap regs)
10272 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10274 gcc_assert (flag_split_stack);
10275 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10279 /* Extract the parts of an RTL expression that is a valid memory address
10280 for an instruction. Return false if the structure of the address is
10281 grossly off. */
10283 bool
10284 ix86_decompose_address (rtx addr, struct ix86_address *out)
10286 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10287 rtx base_reg, index_reg;
10288 HOST_WIDE_INT scale = 1;
10289 rtx scale_rtx = NULL_RTX;
10290 rtx tmp;
10291 addr_space_t seg = ADDR_SPACE_GENERIC;
10293 /* Allow zero-extended SImode addresses,
10294 they will be emitted with addr32 prefix. */
10295 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10297 if (GET_CODE (addr) == ZERO_EXTEND
10298 && GET_MODE (XEXP (addr, 0)) == SImode)
10300 addr = XEXP (addr, 0);
10301 if (CONST_INT_P (addr))
10302 return false;
10304 else if (GET_CODE (addr) == AND
10305 && const_32bit_mask (XEXP (addr, 1), DImode))
10307 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10308 if (addr == NULL_RTX)
10309 return false;
10311 if (CONST_INT_P (addr))
10312 return false;
10314 else if (GET_CODE (addr) == AND)
10316 /* For ASHIFT inside AND, combine will not generate
10317 canonical zero-extend. Merge mask for AND and shift_count
10318 to check if it is canonical zero-extend. */
10319 tmp = XEXP (addr, 0);
10320 rtx mask = XEXP (addr, 1);
10321 if (tmp && GET_CODE(tmp) == ASHIFT)
10323 rtx shift_val = XEXP (tmp, 1);
10324 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10325 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10326 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10327 == 0xffffffff))
10329 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10330 DImode);
10337 /* Allow SImode subregs of DImode addresses,
10338 they will be emitted with addr32 prefix. */
10339 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10341 if (SUBREG_P (addr)
10342 && GET_MODE (SUBREG_REG (addr)) == DImode)
10344 addr = SUBREG_REG (addr);
10345 if (CONST_INT_P (addr))
10346 return false;
10350 if (REG_P (addr))
10351 base = addr;
10352 else if (SUBREG_P (addr))
10354 if (REG_P (SUBREG_REG (addr)))
10355 base = addr;
10356 else
10357 return false;
10359 else if (GET_CODE (addr) == PLUS)
10361 rtx addends[4], op;
10362 int n = 0, i;
10364 op = addr;
10367 if (n >= 4)
10368 return false;
10369 addends[n++] = XEXP (op, 1);
10370 op = XEXP (op, 0);
10372 while (GET_CODE (op) == PLUS);
10373 if (n >= 4)
10374 return false;
10375 addends[n] = op;
10377 for (i = n; i >= 0; --i)
10379 op = addends[i];
10380 switch (GET_CODE (op))
10382 case MULT:
10383 if (index)
10384 return false;
10385 index = XEXP (op, 0);
10386 scale_rtx = XEXP (op, 1);
10387 break;
10389 case ASHIFT:
10390 if (index)
10391 return false;
10392 index = XEXP (op, 0);
10393 tmp = XEXP (op, 1);
10394 if (!CONST_INT_P (tmp))
10395 return false;
10396 scale = INTVAL (tmp);
10397 if ((unsigned HOST_WIDE_INT) scale > 3)
10398 return false;
10399 scale = 1 << scale;
10400 break;
10402 case ZERO_EXTEND:
10403 op = XEXP (op, 0);
10404 if (GET_CODE (op) != UNSPEC)
10405 return false;
10406 /* FALLTHRU */
10408 case UNSPEC:
10409 if (XINT (op, 1) == UNSPEC_TP
10410 && TARGET_TLS_DIRECT_SEG_REFS
10411 && seg == ADDR_SPACE_GENERIC)
10412 seg = DEFAULT_TLS_SEG_REG;
10413 else
10414 return false;
10415 break;
10417 case SUBREG:
10418 if (!REG_P (SUBREG_REG (op)))
10419 return false;
10420 /* FALLTHRU */
10422 case REG:
10423 if (!base)
10424 base = op;
10425 else if (!index)
10426 index = op;
10427 else
10428 return false;
10429 break;
10431 case CONST:
10432 case CONST_INT:
10433 case SYMBOL_REF:
10434 case LABEL_REF:
10435 if (disp)
10436 return false;
10437 disp = op;
10438 break;
10440 default:
10441 return false;
10445 else if (GET_CODE (addr) == MULT)
10447 index = XEXP (addr, 0); /* index*scale */
10448 scale_rtx = XEXP (addr, 1);
10450 else if (GET_CODE (addr) == ASHIFT)
10452 /* We're called for lea too, which implements ashift on occasion. */
10453 index = XEXP (addr, 0);
10454 tmp = XEXP (addr, 1);
10455 if (!CONST_INT_P (tmp))
10456 return false;
10457 scale = INTVAL (tmp);
10458 if ((unsigned HOST_WIDE_INT) scale > 3)
10459 return false;
10460 scale = 1 << scale;
10462 else
10463 disp = addr; /* displacement */
10465 if (index)
10467 if (REG_P (index))
10469 else if (SUBREG_P (index)
10470 && REG_P (SUBREG_REG (index)))
10472 else
10473 return false;
10476 /* Extract the integral value of scale. */
10477 if (scale_rtx)
10479 if (!CONST_INT_P (scale_rtx))
10480 return false;
10481 scale = INTVAL (scale_rtx);
10484 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10485 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10487 /* Avoid useless 0 displacement. */
10488 if (disp == const0_rtx && (base || index))
10489 disp = NULL_RTX;
10491 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10492 if (base_reg && index_reg && scale == 1
10493 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10494 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10495 || REGNO (index_reg) == SP_REG))
10497 std::swap (base, index);
10498 std::swap (base_reg, index_reg);
10501 /* Special case: %ebp cannot be encoded as a base without a displacement.
10502 Similarly %r13. */
10503 if (!disp && base_reg
10504 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10505 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10506 || REGNO (base_reg) == BP_REG
10507 || REGNO (base_reg) == R13_REG))
10508 disp = const0_rtx;
10510 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10511 Avoid this by transforming to [%esi+0].
10512 Reload calls address legitimization without cfun defined, so we need
10513 to test cfun for being non-NULL. */
10514 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
10515 && base_reg && !index_reg && !disp
10516 && REGNO (base_reg) == SI_REG)
10517 disp = const0_rtx;
10519 /* Special case: encode reg+reg instead of reg*2. */
10520 if (!base && index && scale == 2)
10521 base = index, base_reg = index_reg, scale = 1;
10523 /* Special case: scaling cannot be encoded without base or displacement. */
10524 if (!base && !disp && index && scale != 1)
10525 disp = const0_rtx;
10527 out->base = base;
10528 out->index = index;
10529 out->disp = disp;
10530 out->scale = scale;
10531 out->seg = seg;
10533 return true;
10536 /* Return cost of the memory address x.
10537 For i386, it is better to use a complex address than let gcc copy
10538 the address into a reg and make a new pseudo. But not if the address
10539 requires to two regs - that would mean more pseudos with longer
10540 lifetimes. */
10541 static int
10542 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10544 struct ix86_address parts;
10545 int cost = 1;
10546 int ok = ix86_decompose_address (x, &parts);
10548 gcc_assert (ok);
10550 if (parts.base && SUBREG_P (parts.base))
10551 parts.base = SUBREG_REG (parts.base);
10552 if (parts.index && SUBREG_P (parts.index))
10553 parts.index = SUBREG_REG (parts.index);
10555 /* Attempt to minimize number of registers in the address by increasing
10556 address cost for each used register. We don't increase address cost
10557 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10558 is not invariant itself it most likely means that base or index is not
10559 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10560 which is not profitable for x86. */
10561 if (parts.base
10562 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10563 && (current_pass->type == GIMPLE_PASS
10564 || !pic_offset_table_rtx
10565 || !REG_P (parts.base)
10566 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10567 cost++;
10569 if (parts.index
10570 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10571 && (current_pass->type == GIMPLE_PASS
10572 || !pic_offset_table_rtx
10573 || !REG_P (parts.index)
10574 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10575 cost++;
10577 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10578 since it's predecode logic can't detect the length of instructions
10579 and it degenerates to vector decoded. Increase cost of such
10580 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10581 to split such addresses or even refuse such addresses at all.
10583 Following addressing modes are affected:
10584 [base+scale*index]
10585 [scale*index+disp]
10586 [base+index]
10588 The first and last case may be avoidable by explicitly coding the zero in
10589 memory address, but I don't have AMD-K6 machine handy to check this
10590 theory. */
10592 if (TARGET_CPU_P (K6)
10593 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10594 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10595 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10596 cost += 10;
10598 return cost;
10601 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10602 this is used for to form addresses to local data when -fPIC is in
10603 use. */
10605 static bool
10606 darwin_local_data_pic (rtx disp)
10608 return (GET_CODE (disp) == UNSPEC
10609 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10612 /* True if the function symbol operand X should be loaded from GOT.
10613 If CALL_P is true, X is a call operand.
10615 NB: -mno-direct-extern-access doesn't force load from GOT for
10616 call.
10618 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
10619 statements, since a PIC register could not be available at the
10620 call site. */
10622 bool
10623 ix86_force_load_from_GOT_p (rtx x, bool call_p)
10625 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
10626 && !TARGET_PECOFF && !TARGET_MACHO
10627 && (!flag_pic || this_is_asm_operands)
10628 && ix86_cmodel != CM_LARGE
10629 && ix86_cmodel != CM_LARGE_PIC
10630 && GET_CODE (x) == SYMBOL_REF
10631 && ((!call_p
10632 && (!ix86_direct_extern_access
10633 || (SYMBOL_REF_DECL (x)
10634 && lookup_attribute ("nodirect_extern_access",
10635 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
10636 || (SYMBOL_REF_FUNCTION_P (x)
10637 && (!flag_plt
10638 || (SYMBOL_REF_DECL (x)
10639 && lookup_attribute ("noplt",
10640 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
10641 && !SYMBOL_REF_LOCAL_P (x));
10644 /* Determine if a given RTX is a valid constant. We already know this
10645 satisfies CONSTANT_P. */
10647 static bool
10648 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10650 switch (GET_CODE (x))
10652 case CONST:
10653 x = XEXP (x, 0);
10655 if (GET_CODE (x) == PLUS)
10657 if (!CONST_INT_P (XEXP (x, 1)))
10658 return false;
10659 x = XEXP (x, 0);
10662 if (TARGET_MACHO && darwin_local_data_pic (x))
10663 return true;
10665 /* Only some unspecs are valid as "constants". */
10666 if (GET_CODE (x) == UNSPEC)
10667 switch (XINT (x, 1))
10669 case UNSPEC_GOT:
10670 case UNSPEC_GOTOFF:
10671 case UNSPEC_PLTOFF:
10672 return TARGET_64BIT;
10673 case UNSPEC_TPOFF:
10674 case UNSPEC_NTPOFF:
10675 x = XVECEXP (x, 0, 0);
10676 return (GET_CODE (x) == SYMBOL_REF
10677 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10678 case UNSPEC_DTPOFF:
10679 x = XVECEXP (x, 0, 0);
10680 return (GET_CODE (x) == SYMBOL_REF
10681 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10682 default:
10683 return false;
10686 /* We must have drilled down to a symbol. */
10687 if (GET_CODE (x) == LABEL_REF)
10688 return true;
10689 if (GET_CODE (x) != SYMBOL_REF)
10690 return false;
10691 /* FALLTHRU */
10693 case SYMBOL_REF:
10694 /* TLS symbols are never valid. */
10695 if (SYMBOL_REF_TLS_MODEL (x))
10696 return false;
10698 /* DLLIMPORT symbols are never valid. */
10699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10700 && SYMBOL_REF_DLLIMPORT_P (x))
10701 return false;
10703 #if TARGET_MACHO
10704 /* mdynamic-no-pic */
10705 if (MACHO_DYNAMIC_NO_PIC_P)
10706 return machopic_symbol_defined_p (x);
10707 #endif
10709 /* External function address should be loaded
10710 via the GOT slot to avoid PLT. */
10711 if (ix86_force_load_from_GOT_p (x))
10712 return false;
10714 break;
10716 CASE_CONST_SCALAR_INT:
10717 if (ix86_endbr_immediate_operand (x, VOIDmode))
10718 return false;
10720 switch (mode)
10722 case E_TImode:
10723 if (TARGET_64BIT)
10724 return true;
10725 /* FALLTHRU */
10726 case E_OImode:
10727 case E_XImode:
10728 if (!standard_sse_constant_p (x, mode)
10729 && GET_MODE_SIZE (TARGET_AVX512F
10730 ? XImode
10731 : (TARGET_AVX
10732 ? OImode
10733 : (TARGET_SSE2
10734 ? TImode : DImode))) < GET_MODE_SIZE (mode))
10735 return false;
10736 default:
10737 break;
10739 break;
10741 case CONST_VECTOR:
10742 if (!standard_sse_constant_p (x, mode))
10743 return false;
10744 break;
10746 case CONST_DOUBLE:
10747 if (mode == E_BFmode)
10748 return false;
10750 default:
10751 break;
10754 /* Otherwise we handle everything else in the move patterns. */
10755 return true;
10758 /* Determine if it's legal to put X into the constant pool. This
10759 is not possible for the address of thread-local symbols, which
10760 is checked above. */
10762 static bool
10763 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10765 /* We can put any immediate constant in memory. */
10766 switch (GET_CODE (x))
10768 CASE_CONST_ANY:
10769 return false;
10771 default:
10772 break;
10775 return !ix86_legitimate_constant_p (mode, x);
10778 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10779 otherwise zero. */
10781 static bool
10782 is_imported_p (rtx x)
10784 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10785 || GET_CODE (x) != SYMBOL_REF)
10786 return false;
10788 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10792 /* Nonzero if the constant value X is a legitimate general operand
10793 when generating PIC code. It is given that flag_pic is on and
10794 that X satisfies CONSTANT_P. */
10796 bool
10797 legitimate_pic_operand_p (rtx x)
10799 rtx inner;
10801 switch (GET_CODE (x))
10803 case CONST:
10804 inner = XEXP (x, 0);
10805 if (GET_CODE (inner) == PLUS
10806 && CONST_INT_P (XEXP (inner, 1)))
10807 inner = XEXP (inner, 0);
10809 /* Only some unspecs are valid as "constants". */
10810 if (GET_CODE (inner) == UNSPEC)
10811 switch (XINT (inner, 1))
10813 case UNSPEC_GOT:
10814 case UNSPEC_GOTOFF:
10815 case UNSPEC_PLTOFF:
10816 return TARGET_64BIT;
10817 case UNSPEC_TPOFF:
10818 x = XVECEXP (inner, 0, 0);
10819 return (GET_CODE (x) == SYMBOL_REF
10820 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10821 case UNSPEC_MACHOPIC_OFFSET:
10822 return legitimate_pic_address_disp_p (x);
10823 default:
10824 return false;
10826 /* FALLTHRU */
10828 case SYMBOL_REF:
10829 case LABEL_REF:
10830 return legitimate_pic_address_disp_p (x);
10832 default:
10833 return true;
10837 /* Determine if a given CONST RTX is a valid memory displacement
10838 in PIC mode. */
10840 bool
10841 legitimate_pic_address_disp_p (rtx disp)
10843 bool saw_plus;
10845 /* In 64bit mode we can allow direct addresses of symbols and labels
10846 when they are not dynamic symbols. */
10847 if (TARGET_64BIT)
10849 rtx op0 = disp, op1;
10851 switch (GET_CODE (disp))
10853 case LABEL_REF:
10854 return true;
10856 case CONST:
10857 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10858 break;
10859 op0 = XEXP (XEXP (disp, 0), 0);
10860 op1 = XEXP (XEXP (disp, 0), 1);
10861 if (!CONST_INT_P (op1))
10862 break;
10863 if (GET_CODE (op0) == UNSPEC
10864 && (XINT (op0, 1) == UNSPEC_DTPOFF
10865 || XINT (op0, 1) == UNSPEC_NTPOFF)
10866 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10867 return true;
10868 if (INTVAL (op1) >= 16*1024*1024
10869 || INTVAL (op1) < -16*1024*1024)
10870 break;
10871 if (GET_CODE (op0) == LABEL_REF)
10872 return true;
10873 if (GET_CODE (op0) == CONST
10874 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10875 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10876 return true;
10877 if (GET_CODE (op0) == UNSPEC
10878 && XINT (op0, 1) == UNSPEC_PCREL)
10879 return true;
10880 if (GET_CODE (op0) != SYMBOL_REF)
10881 break;
10882 /* FALLTHRU */
10884 case SYMBOL_REF:
10885 /* TLS references should always be enclosed in UNSPEC.
10886 The dllimported symbol needs always to be resolved. */
10887 if (SYMBOL_REF_TLS_MODEL (op0)
10888 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10889 return false;
10891 if (TARGET_PECOFF)
10893 if (is_imported_p (op0))
10894 return true;
10896 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
10897 break;
10899 /* Non-external-weak function symbols need to be resolved only
10900 for the large model. Non-external symbols don't need to be
10901 resolved for large and medium models. For the small model,
10902 we don't need to resolve anything here. */
10903 if ((ix86_cmodel != CM_LARGE_PIC
10904 && SYMBOL_REF_FUNCTION_P (op0)
10905 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
10906 || !SYMBOL_REF_EXTERNAL_P (op0)
10907 || ix86_cmodel == CM_SMALL_PIC)
10908 return true;
10910 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10911 && (SYMBOL_REF_LOCAL_P (op0)
10912 || ((ix86_direct_extern_access
10913 && !(SYMBOL_REF_DECL (op0)
10914 && lookup_attribute ("nodirect_extern_access",
10915 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
10916 && HAVE_LD_PIE_COPYRELOC
10917 && flag_pie
10918 && !SYMBOL_REF_WEAK (op0)
10919 && !SYMBOL_REF_FUNCTION_P (op0)))
10920 && ix86_cmodel != CM_LARGE_PIC)
10921 return true;
10922 break;
10924 default:
10925 break;
10928 if (GET_CODE (disp) != CONST)
10929 return false;
10930 disp = XEXP (disp, 0);
10932 if (TARGET_64BIT)
10934 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10935 of GOT tables. We should not need these anyway. */
10936 if (GET_CODE (disp) != UNSPEC
10937 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10938 && XINT (disp, 1) != UNSPEC_GOTOFF
10939 && XINT (disp, 1) != UNSPEC_PCREL
10940 && XINT (disp, 1) != UNSPEC_PLTOFF))
10941 return false;
10943 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10944 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10945 return false;
10946 return true;
10949 saw_plus = false;
10950 if (GET_CODE (disp) == PLUS)
10952 if (!CONST_INT_P (XEXP (disp, 1)))
10953 return false;
10954 disp = XEXP (disp, 0);
10955 saw_plus = true;
10958 if (TARGET_MACHO && darwin_local_data_pic (disp))
10959 return true;
10961 if (GET_CODE (disp) != UNSPEC)
10962 return false;
10964 switch (XINT (disp, 1))
10966 case UNSPEC_GOT:
10967 if (saw_plus)
10968 return false;
10969 /* We need to check for both symbols and labels because VxWorks loads
10970 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10971 details. */
10972 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10973 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10974 case UNSPEC_GOTOFF:
10975 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10976 While ABI specify also 32bit relocation but we don't produce it in
10977 small PIC model at all. */
10978 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10979 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10980 && !TARGET_64BIT)
10981 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10982 return false;
10983 case UNSPEC_GOTTPOFF:
10984 case UNSPEC_GOTNTPOFF:
10985 case UNSPEC_INDNTPOFF:
10986 if (saw_plus)
10987 return false;
10988 disp = XVECEXP (disp, 0, 0);
10989 return (GET_CODE (disp) == SYMBOL_REF
10990 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10991 case UNSPEC_NTPOFF:
10992 disp = XVECEXP (disp, 0, 0);
10993 return (GET_CODE (disp) == SYMBOL_REF
10994 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10995 case UNSPEC_DTPOFF:
10996 disp = XVECEXP (disp, 0, 0);
10997 return (GET_CODE (disp) == SYMBOL_REF
10998 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11001 return false;
11004 /* Determine if op is suitable RTX for an address register.
11005 Return naked register if a register or a register subreg is
11006 found, otherwise return NULL_RTX. */
11008 static rtx
11009 ix86_validate_address_register (rtx op)
11011 machine_mode mode = GET_MODE (op);
11013 /* Only SImode or DImode registers can form the address. */
11014 if (mode != SImode && mode != DImode)
11015 return NULL_RTX;
11017 if (REG_P (op))
11018 return op;
11019 else if (SUBREG_P (op))
11021 rtx reg = SUBREG_REG (op);
11023 if (!REG_P (reg))
11024 return NULL_RTX;
11026 mode = GET_MODE (reg);
11028 /* Don't allow SUBREGs that span more than a word. It can
11029 lead to spill failures when the register is one word out
11030 of a two word structure. */
11031 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11032 return NULL_RTX;
11034 /* Allow only SUBREGs of non-eliminable hard registers. */
11035 if (register_no_elim_operand (reg, mode))
11036 return reg;
11039 /* Op is not a register. */
11040 return NULL_RTX;
11043 /* Recognizes RTL expressions that are valid memory addresses for an
11044 instruction. The MODE argument is the machine mode for the MEM
11045 expression that wants to use this address.
11047 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11048 convert common non-canonical forms to canonical form so that they will
11049 be recognized. */
11051 static bool
11052 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
11053 code_helper = ERROR_MARK)
11055 struct ix86_address parts;
11056 rtx base, index, disp;
11057 HOST_WIDE_INT scale;
11058 addr_space_t seg;
11060 if (ix86_decompose_address (addr, &parts) == 0)
11061 /* Decomposition failed. */
11062 return false;
11064 base = parts.base;
11065 index = parts.index;
11066 disp = parts.disp;
11067 scale = parts.scale;
11068 seg = parts.seg;
11070 /* Validate base register. */
11071 if (base)
11073 rtx reg = ix86_validate_address_register (base);
11075 if (reg == NULL_RTX)
11076 return false;
11078 unsigned int regno = REGNO (reg);
11079 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11080 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11081 /* Base is not valid. */
11082 return false;
11085 /* Validate index register. */
11086 if (index)
11088 rtx reg = ix86_validate_address_register (index);
11090 if (reg == NULL_RTX)
11091 return false;
11093 unsigned int regno = REGNO (reg);
11094 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11095 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11096 /* Index is not valid. */
11097 return false;
11100 /* Index and base should have the same mode. */
11101 if (base && index
11102 && GET_MODE (base) != GET_MODE (index))
11103 return false;
11105 /* Address override works only on the (%reg) part of %fs:(%reg). */
11106 if (seg != ADDR_SPACE_GENERIC
11107 && ((base && GET_MODE (base) != word_mode)
11108 || (index && GET_MODE (index) != word_mode)))
11109 return false;
11111 /* Validate scale factor. */
11112 if (scale != 1)
11114 if (!index)
11115 /* Scale without index. */
11116 return false;
11118 if (scale != 2 && scale != 4 && scale != 8)
11119 /* Scale is not a valid multiplier. */
11120 return false;
11123 /* Validate displacement. */
11124 if (disp)
11126 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11127 return false;
11129 if (GET_CODE (disp) == CONST
11130 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11131 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11132 switch (XINT (XEXP (disp, 0), 1))
11134 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11135 when used. While ABI specify also 32bit relocations, we
11136 don't produce them at all and use IP relative instead.
11137 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11138 should be loaded via GOT. */
11139 case UNSPEC_GOT:
11140 if (!TARGET_64BIT
11141 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11142 goto is_legitimate_pic;
11143 /* FALLTHRU */
11144 case UNSPEC_GOTOFF:
11145 gcc_assert (flag_pic);
11146 if (!TARGET_64BIT)
11147 goto is_legitimate_pic;
11149 /* 64bit address unspec. */
11150 return false;
11152 case UNSPEC_GOTPCREL:
11153 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11154 goto is_legitimate_pic;
11155 /* FALLTHRU */
11156 case UNSPEC_PCREL:
11157 gcc_assert (flag_pic);
11158 goto is_legitimate_pic;
11160 case UNSPEC_GOTTPOFF:
11161 case UNSPEC_GOTNTPOFF:
11162 case UNSPEC_INDNTPOFF:
11163 case UNSPEC_NTPOFF:
11164 case UNSPEC_DTPOFF:
11165 break;
11167 default:
11168 /* Invalid address unspec. */
11169 return false;
11172 else if (SYMBOLIC_CONST (disp)
11173 && (flag_pic
11174 #if TARGET_MACHO
11175 || (MACHOPIC_INDIRECT
11176 && !machopic_operand_p (disp))
11177 #endif
11181 is_legitimate_pic:
11182 if (TARGET_64BIT && (index || base))
11184 /* foo@dtpoff(%rX) is ok. */
11185 if (GET_CODE (disp) != CONST
11186 || GET_CODE (XEXP (disp, 0)) != PLUS
11187 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11188 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11189 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11190 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11191 /* Non-constant pic memory reference. */
11192 return false;
11194 else if ((!TARGET_MACHO || flag_pic)
11195 && ! legitimate_pic_address_disp_p (disp))
11196 /* Displacement is an invalid pic construct. */
11197 return false;
11198 #if TARGET_MACHO
11199 else if (MACHO_DYNAMIC_NO_PIC_P
11200 && !ix86_legitimate_constant_p (Pmode, disp))
11201 /* displacment must be referenced via non_lazy_pointer */
11202 return false;
11203 #endif
11205 /* This code used to verify that a symbolic pic displacement
11206 includes the pic_offset_table_rtx register.
11208 While this is good idea, unfortunately these constructs may
11209 be created by "adds using lea" optimization for incorrect
11210 code like:
11212 int a;
11213 int foo(int i)
11215 return *(&a+i);
11218 This code is nonsensical, but results in addressing
11219 GOT table with pic_offset_table_rtx base. We can't
11220 just refuse it easily, since it gets matched by
11221 "addsi3" pattern, that later gets split to lea in the
11222 case output register differs from input. While this
11223 can be handled by separate addsi pattern for this case
11224 that never results in lea, this seems to be easier and
11225 correct fix for crash to disable this test. */
11227 else if (GET_CODE (disp) != LABEL_REF
11228 && !CONST_INT_P (disp)
11229 && (GET_CODE (disp) != CONST
11230 || !ix86_legitimate_constant_p (Pmode, disp))
11231 && (GET_CODE (disp) != SYMBOL_REF
11232 || !ix86_legitimate_constant_p (Pmode, disp)))
11233 /* Displacement is not constant. */
11234 return false;
11235 else if (TARGET_64BIT
11236 && !x86_64_immediate_operand (disp, VOIDmode))
11237 /* Displacement is out of range. */
11238 return false;
11239 /* In x32 mode, constant addresses are sign extended to 64bit, so
11240 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11241 else if (TARGET_X32 && !(index || base)
11242 && CONST_INT_P (disp)
11243 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11244 return false;
11247 /* Everything looks valid. */
11248 return true;
11251 /* Determine if a given RTX is a valid constant address. */
11253 bool
11254 constant_address_p (rtx x)
11256 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11259 /* Return a unique alias set for the GOT. */
11261 alias_set_type
11262 ix86_GOT_alias_set (void)
11264 static alias_set_type set = -1;
11265 if (set == -1)
11266 set = new_alias_set ();
11267 return set;
11270 /* Return a legitimate reference for ORIG (an address) using the
11271 register REG. If REG is 0, a new pseudo is generated.
11273 There are two types of references that must be handled:
11275 1. Global data references must load the address from the GOT, via
11276 the PIC reg. An insn is emitted to do this load, and the reg is
11277 returned.
11279 2. Static data references, constant pool addresses, and code labels
11280 compute the address as an offset from the GOT, whose base is in
11281 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11282 differentiate them from global data objects. The returned
11283 address is the PIC reg + an unspec constant.
11285 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11286 reg also appears in the address. */
11289 legitimize_pic_address (rtx orig, rtx reg)
11291 rtx addr = orig;
11292 rtx new_rtx = orig;
11294 #if TARGET_MACHO
11295 if (TARGET_MACHO && !TARGET_64BIT)
11297 if (reg == 0)
11298 reg = gen_reg_rtx (Pmode);
11299 /* Use the generic Mach-O PIC machinery. */
11300 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11302 #endif
11304 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11306 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11307 if (tmp)
11308 return tmp;
11311 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11312 new_rtx = addr;
11313 else if ((!TARGET_64BIT
11314 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11315 && !TARGET_PECOFF
11316 && gotoff_operand (addr, Pmode))
11318 /* This symbol may be referenced via a displacement
11319 from the PIC base address (@GOTOFF). */
11320 if (GET_CODE (addr) == CONST)
11321 addr = XEXP (addr, 0);
11323 if (GET_CODE (addr) == PLUS)
11325 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11326 UNSPEC_GOTOFF);
11327 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11329 else
11330 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11332 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11334 if (TARGET_64BIT)
11335 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11337 if (reg != 0)
11339 gcc_assert (REG_P (reg));
11340 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11341 new_rtx, reg, 1, OPTAB_DIRECT);
11343 else
11344 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11346 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11347 /* We can't always use @GOTOFF for text labels
11348 on VxWorks, see gotoff_operand. */
11349 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11351 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11352 if (tmp)
11353 return tmp;
11355 /* For x64 PE-COFF there is no GOT table,
11356 so we use address directly. */
11357 if (TARGET_64BIT && TARGET_PECOFF)
11359 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11360 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11362 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11364 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11365 UNSPEC_GOTPCREL);
11366 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11367 new_rtx = gen_const_mem (Pmode, new_rtx);
11368 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11370 else
11372 /* This symbol must be referenced via a load
11373 from the Global Offset Table (@GOT). */
11374 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11375 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11377 if (TARGET_64BIT)
11378 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11380 if (reg != 0)
11382 gcc_assert (REG_P (reg));
11383 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11384 new_rtx, reg, 1, OPTAB_DIRECT);
11386 else
11387 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11389 new_rtx = gen_const_mem (Pmode, new_rtx);
11390 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11393 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11395 else
11397 if (CONST_INT_P (addr)
11398 && !x86_64_immediate_operand (addr, VOIDmode))
11399 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11400 else if (GET_CODE (addr) == CONST)
11402 addr = XEXP (addr, 0);
11404 /* We must match stuff we generate before. Assume the only
11405 unspecs that can get here are ours. Not that we could do
11406 anything with them anyway.... */
11407 if (GET_CODE (addr) == UNSPEC
11408 || (GET_CODE (addr) == PLUS
11409 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11410 return orig;
11411 gcc_assert (GET_CODE (addr) == PLUS);
11414 if (GET_CODE (addr) == PLUS)
11416 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11418 /* Check first to see if this is a constant
11419 offset from a @GOTOFF symbol reference. */
11420 if (!TARGET_PECOFF
11421 && gotoff_operand (op0, Pmode)
11422 && CONST_INT_P (op1))
11424 if (!TARGET_64BIT)
11426 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11427 UNSPEC_GOTOFF);
11428 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11429 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11431 if (reg != 0)
11433 gcc_assert (REG_P (reg));
11434 new_rtx = expand_simple_binop (Pmode, PLUS,
11435 pic_offset_table_rtx,
11436 new_rtx, reg, 1,
11437 OPTAB_DIRECT);
11439 else
11440 new_rtx
11441 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11443 else
11445 if (INTVAL (op1) < -16*1024*1024
11446 || INTVAL (op1) >= 16*1024*1024)
11448 if (!x86_64_immediate_operand (op1, Pmode))
11449 op1 = force_reg (Pmode, op1);
11451 new_rtx
11452 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11456 else
11458 rtx base = legitimize_pic_address (op0, reg);
11459 machine_mode mode = GET_MODE (base);
11460 new_rtx
11461 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
11463 if (CONST_INT_P (new_rtx))
11465 if (INTVAL (new_rtx) < -16*1024*1024
11466 || INTVAL (new_rtx) >= 16*1024*1024)
11468 if (!x86_64_immediate_operand (new_rtx, mode))
11469 new_rtx = force_reg (mode, new_rtx);
11471 new_rtx
11472 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
11474 else
11475 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
11477 else
11479 /* For %rip addressing, we have to use
11480 just disp32, not base nor index. */
11481 if (TARGET_64BIT
11482 && (GET_CODE (base) == SYMBOL_REF
11483 || GET_CODE (base) == LABEL_REF))
11484 base = force_reg (mode, base);
11485 if (GET_CODE (new_rtx) == PLUS
11486 && CONSTANT_P (XEXP (new_rtx, 1)))
11488 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
11489 new_rtx = XEXP (new_rtx, 1);
11491 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
11496 return new_rtx;
11499 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11501 static rtx
11502 get_thread_pointer (machine_mode tp_mode, bool to_reg)
11504 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11506 if (GET_MODE (tp) != tp_mode)
11508 gcc_assert (GET_MODE (tp) == SImode);
11509 gcc_assert (tp_mode == DImode);
11511 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
11514 if (to_reg)
11515 tp = copy_to_mode_reg (tp_mode, tp);
11517 return tp;
11520 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11522 static GTY(()) rtx ix86_tls_symbol;
11524 static rtx
11525 ix86_tls_get_addr (void)
11527 if (!ix86_tls_symbol)
11529 const char *sym
11530 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
11531 ? "___tls_get_addr" : "__tls_get_addr");
11533 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
11536 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
11538 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
11539 UNSPEC_PLTOFF);
11540 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
11541 gen_rtx_CONST (Pmode, unspec));
11544 return ix86_tls_symbol;
11547 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11549 static GTY(()) rtx ix86_tls_module_base_symbol;
11552 ix86_tls_module_base (void)
11554 if (!ix86_tls_module_base_symbol)
11556 ix86_tls_module_base_symbol
11557 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
11559 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
11560 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
11563 return ix86_tls_module_base_symbol;
11566 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11567 false if we expect this to be used for a memory address and true if
11568 we expect to load the address into a register. */
11571 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
11573 rtx dest, base, off;
11574 rtx pic = NULL_RTX, tp = NULL_RTX;
11575 machine_mode tp_mode = Pmode;
11576 int type;
11578 /* Fall back to global dynamic model if tool chain cannot support local
11579 dynamic. */
11580 if (TARGET_SUN_TLS && !TARGET_64BIT
11581 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11582 && model == TLS_MODEL_LOCAL_DYNAMIC)
11583 model = TLS_MODEL_GLOBAL_DYNAMIC;
11585 switch (model)
11587 case TLS_MODEL_GLOBAL_DYNAMIC:
11588 if (!TARGET_64BIT)
11590 if (flag_pic && !TARGET_PECOFF)
11591 pic = pic_offset_table_rtx;
11592 else
11594 pic = gen_reg_rtx (Pmode);
11595 emit_insn (gen_set_got (pic));
11599 if (TARGET_GNU2_TLS)
11601 dest = gen_reg_rtx (ptr_mode);
11602 if (TARGET_64BIT)
11603 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11604 else
11605 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11607 tp = get_thread_pointer (ptr_mode, true);
11608 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11609 if (GET_MODE (dest) != Pmode)
11610 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11611 dest = force_reg (Pmode, dest);
11613 if (GET_MODE (x) != Pmode)
11614 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11616 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11618 else
11620 rtx caddr = ix86_tls_get_addr ();
11622 dest = gen_reg_rtx (Pmode);
11623 if (TARGET_64BIT)
11625 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11626 rtx_insn *insns;
11628 start_sequence ();
11629 emit_call_insn
11630 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11631 insns = get_insns ();
11632 end_sequence ();
11634 if (GET_MODE (x) != Pmode)
11635 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11637 RTL_CONST_CALL_P (insns) = 1;
11638 emit_libcall_block (insns, dest, rax, x);
11640 else
11641 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11643 break;
11645 case TLS_MODEL_LOCAL_DYNAMIC:
11646 if (!TARGET_64BIT)
11648 if (flag_pic)
11649 pic = pic_offset_table_rtx;
11650 else
11652 pic = gen_reg_rtx (Pmode);
11653 emit_insn (gen_set_got (pic));
11657 if (TARGET_GNU2_TLS)
11659 rtx tmp = ix86_tls_module_base ();
11661 base = gen_reg_rtx (ptr_mode);
11662 if (TARGET_64BIT)
11663 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11664 else
11665 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11667 tp = get_thread_pointer (ptr_mode, true);
11668 if (GET_MODE (base) != Pmode)
11669 base = gen_rtx_ZERO_EXTEND (Pmode, base);
11670 base = force_reg (Pmode, base);
11672 else
11674 rtx caddr = ix86_tls_get_addr ();
11676 base = gen_reg_rtx (Pmode);
11677 if (TARGET_64BIT)
11679 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11680 rtx_insn *insns;
11681 rtx eqv;
11683 start_sequence ();
11684 emit_call_insn
11685 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11686 insns = get_insns ();
11687 end_sequence ();
11689 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11690 share the LD_BASE result with other LD model accesses. */
11691 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11692 UNSPEC_TLS_LD_BASE);
11694 RTL_CONST_CALL_P (insns) = 1;
11695 emit_libcall_block (insns, base, rax, eqv);
11697 else
11698 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11701 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11702 off = gen_rtx_CONST (Pmode, off);
11704 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11706 if (TARGET_GNU2_TLS)
11708 if (GET_MODE (tp) != Pmode)
11710 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11711 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11712 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11714 else
11715 dest = gen_rtx_PLUS (Pmode, tp, dest);
11716 dest = force_reg (Pmode, dest);
11718 if (GET_MODE (x) != Pmode)
11719 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11721 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11723 break;
11725 case TLS_MODEL_INITIAL_EXEC:
11726 if (TARGET_64BIT)
11728 if (TARGET_SUN_TLS && !TARGET_X32)
11730 /* The Sun linker took the AMD64 TLS spec literally
11731 and can only handle %rax as destination of the
11732 initial executable code sequence. */
11734 dest = gen_reg_rtx (DImode);
11735 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11736 return dest;
11739 /* Generate DImode references to avoid %fs:(%reg32)
11740 problems and linker IE->LE relaxation bug. */
11741 tp_mode = DImode;
11742 pic = NULL;
11743 type = UNSPEC_GOTNTPOFF;
11745 else if (flag_pic)
11747 pic = pic_offset_table_rtx;
11748 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11750 else if (!TARGET_ANY_GNU_TLS)
11752 pic = gen_reg_rtx (Pmode);
11753 emit_insn (gen_set_got (pic));
11754 type = UNSPEC_GOTTPOFF;
11756 else
11758 pic = NULL;
11759 type = UNSPEC_INDNTPOFF;
11762 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11763 off = gen_rtx_CONST (tp_mode, off);
11764 if (pic)
11765 off = gen_rtx_PLUS (tp_mode, pic, off);
11766 off = gen_const_mem (tp_mode, off);
11767 set_mem_alias_set (off, ix86_GOT_alias_set ());
11769 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11771 base = get_thread_pointer (tp_mode,
11772 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11773 off = force_reg (tp_mode, off);
11774 dest = gen_rtx_PLUS (tp_mode, base, off);
11775 if (tp_mode != Pmode)
11776 dest = convert_to_mode (Pmode, dest, 1);
11778 else
11780 base = get_thread_pointer (Pmode, true);
11781 dest = gen_reg_rtx (Pmode);
11782 emit_insn (gen_sub3_insn (dest, base, off));
11784 break;
11786 case TLS_MODEL_LOCAL_EXEC:
11787 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11788 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11789 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11790 off = gen_rtx_CONST (Pmode, off);
11792 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11794 base = get_thread_pointer (Pmode,
11795 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11796 return gen_rtx_PLUS (Pmode, base, off);
11798 else
11800 base = get_thread_pointer (Pmode, true);
11801 dest = gen_reg_rtx (Pmode);
11802 emit_insn (gen_sub3_insn (dest, base, off));
11804 break;
11806 default:
11807 gcc_unreachable ();
11810 return dest;
11813 /* Return true if the TLS address requires insn using integer registers.
11814 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
11815 MOV instructions, refer to PR103275. */
11816 bool
11817 ix86_gpr_tls_address_pattern_p (rtx mem)
11819 gcc_assert (MEM_P (mem));
11821 rtx addr = XEXP (mem, 0);
11822 subrtx_var_iterator::array_type array;
11823 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
11825 rtx op = *iter;
11826 if (GET_CODE (op) == UNSPEC)
11827 switch (XINT (op, 1))
11829 case UNSPEC_GOTNTPOFF:
11830 return true;
11831 case UNSPEC_TPOFF:
11832 if (!TARGET_64BIT)
11833 return true;
11834 break;
11835 default:
11836 break;
11840 return false;
11843 /* Return true if OP refers to a TLS address. */
11844 bool
11845 ix86_tls_address_pattern_p (rtx op)
11847 subrtx_var_iterator::array_type array;
11848 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11850 rtx op = *iter;
11851 if (MEM_P (op))
11853 rtx *x = &XEXP (op, 0);
11854 while (GET_CODE (*x) == PLUS)
11856 int i;
11857 for (i = 0; i < 2; i++)
11859 rtx u = XEXP (*x, i);
11860 if (GET_CODE (u) == ZERO_EXTEND)
11861 u = XEXP (u, 0);
11862 if (GET_CODE (u) == UNSPEC
11863 && XINT (u, 1) == UNSPEC_TP)
11864 return true;
11866 x = &XEXP (*x, 0);
11869 iter.skip_subrtxes ();
11873 return false;
11876 /* Rewrite *LOC so that it refers to a default TLS address space. */
11877 void
11878 ix86_rewrite_tls_address_1 (rtx *loc)
11880 subrtx_ptr_iterator::array_type array;
11881 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11883 rtx *loc = *iter;
11884 if (MEM_P (*loc))
11886 rtx addr = XEXP (*loc, 0);
11887 rtx *x = &addr;
11888 while (GET_CODE (*x) == PLUS)
11890 int i;
11891 for (i = 0; i < 2; i++)
11893 rtx u = XEXP (*x, i);
11894 if (GET_CODE (u) == ZERO_EXTEND)
11895 u = XEXP (u, 0);
11896 if (GET_CODE (u) == UNSPEC
11897 && XINT (u, 1) == UNSPEC_TP)
11899 addr_space_t as = DEFAULT_TLS_SEG_REG;
11901 *x = XEXP (*x, 1 - i);
11903 *loc = replace_equiv_address_nv (*loc, addr, true);
11904 set_mem_addr_space (*loc, as);
11905 return;
11908 x = &XEXP (*x, 0);
11911 iter.skip_subrtxes ();
11916 /* Rewrite instruction pattern involvning TLS address
11917 so that it refers to a default TLS address space. */
11919 ix86_rewrite_tls_address (rtx pattern)
11921 pattern = copy_insn (pattern);
11922 ix86_rewrite_tls_address_1 (&pattern);
11923 return pattern;
11926 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11927 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11928 unique refptr-DECL symbol corresponding to symbol DECL. */
11930 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11932 static inline hashval_t hash (tree_map *m) { return m->hash; }
11933 static inline bool
11934 equal (tree_map *a, tree_map *b)
11936 return a->base.from == b->base.from;
11939 static int
11940 keep_cache_entry (tree_map *&m)
11942 return ggc_marked_p (m->base.from);
11946 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11948 static tree
11949 get_dllimport_decl (tree decl, bool beimport)
11951 struct tree_map *h, in;
11952 const char *name;
11953 const char *prefix;
11954 size_t namelen, prefixlen;
11955 char *imp_name;
11956 tree to;
11957 rtx rtl;
11959 if (!dllimport_map)
11960 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11962 in.hash = htab_hash_pointer (decl);
11963 in.base.from = decl;
11964 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11965 h = *loc;
11966 if (h)
11967 return h->to;
11969 *loc = h = ggc_alloc<tree_map> ();
11970 h->hash = in.hash;
11971 h->base.from = decl;
11972 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11973 VAR_DECL, NULL, ptr_type_node);
11974 DECL_ARTIFICIAL (to) = 1;
11975 DECL_IGNORED_P (to) = 1;
11976 DECL_EXTERNAL (to) = 1;
11977 TREE_READONLY (to) = 1;
11979 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11980 name = targetm.strip_name_encoding (name);
11981 if (beimport)
11982 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11983 ? "*__imp_" : "*__imp__";
11984 else
11985 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11986 namelen = strlen (name);
11987 prefixlen = strlen (prefix);
11988 imp_name = (char *) alloca (namelen + prefixlen + 1);
11989 memcpy (imp_name, prefix, prefixlen);
11990 memcpy (imp_name + prefixlen, name, namelen + 1);
11992 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11993 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11994 SET_SYMBOL_REF_DECL (rtl, to);
11995 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11996 if (!beimport)
11998 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11999 #ifdef SUB_TARGET_RECORD_STUB
12000 SUB_TARGET_RECORD_STUB (name);
12001 #endif
12004 rtl = gen_const_mem (Pmode, rtl);
12005 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12007 SET_DECL_RTL (to, rtl);
12008 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12010 return to;
12013 /* Expand SYMBOL into its corresponding far-address symbol.
12014 WANT_REG is true if we require the result be a register. */
12016 static rtx
12017 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
12019 tree imp_decl;
12020 rtx x;
12022 gcc_assert (SYMBOL_REF_DECL (symbol));
12023 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
12025 x = DECL_RTL (imp_decl);
12026 if (want_reg)
12027 x = force_reg (Pmode, x);
12028 return x;
12031 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12032 true if we require the result be a register. */
12034 static rtx
12035 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12037 tree imp_decl;
12038 rtx x;
12040 gcc_assert (SYMBOL_REF_DECL (symbol));
12041 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
12043 x = DECL_RTL (imp_decl);
12044 if (want_reg)
12045 x = force_reg (Pmode, x);
12046 return x;
12049 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12050 is true if we require the result be a register. */
12053 legitimize_pe_coff_symbol (rtx addr, bool inreg)
12055 if (!TARGET_PECOFF)
12056 return NULL_RTX;
12058 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12060 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12061 return legitimize_dllimport_symbol (addr, inreg);
12062 if (GET_CODE (addr) == CONST
12063 && GET_CODE (XEXP (addr, 0)) == PLUS
12064 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12065 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12067 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
12068 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12072 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
12073 return NULL_RTX;
12074 if (GET_CODE (addr) == SYMBOL_REF
12075 && !is_imported_p (addr)
12076 && SYMBOL_REF_EXTERNAL_P (addr)
12077 && SYMBOL_REF_DECL (addr))
12078 return legitimize_pe_coff_extern_decl (addr, inreg);
12080 if (GET_CODE (addr) == CONST
12081 && GET_CODE (XEXP (addr, 0)) == PLUS
12082 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12083 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
12084 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
12085 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
12087 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
12088 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12090 return NULL_RTX;
12093 /* Try machine-dependent ways of modifying an illegitimate address
12094 to be legitimate. If we find one, return the new, valid address.
12095 This macro is used in only one place: `memory_address' in explow.cc.
12097 OLDX is the address as it was before break_out_memory_refs was called.
12098 In some cases it is useful to look at this to decide what needs to be done.
12100 It is always safe for this macro to do nothing. It exists to recognize
12101 opportunities to optimize the output.
12103 For the 80386, we handle X+REG by loading X into a register R and
12104 using R+REG. R will go in a general reg and indexing will be used.
12105 However, if REG is a broken-out memory address or multiplication,
12106 nothing needs to be done because REG can certainly go in a general reg.
12108 When -fpic is used, special handling is needed for symbolic references.
12109 See comments by legitimize_pic_address in i386.cc for details. */
12111 static rtx
12112 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12114 bool changed = false;
12115 unsigned log;
12117 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12118 if (log)
12119 return legitimize_tls_address (x, (enum tls_model) log, false);
12120 if (GET_CODE (x) == CONST
12121 && GET_CODE (XEXP (x, 0)) == PLUS
12122 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12123 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12125 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12126 (enum tls_model) log, false);
12127 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12130 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12132 rtx tmp = legitimize_pe_coff_symbol (x, true);
12133 if (tmp)
12134 return tmp;
12137 if (flag_pic && SYMBOLIC_CONST (x))
12138 return legitimize_pic_address (x, 0);
12140 #if TARGET_MACHO
12141 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12142 return machopic_indirect_data_reference (x, 0);
12143 #endif
12145 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12146 if (GET_CODE (x) == ASHIFT
12147 && CONST_INT_P (XEXP (x, 1))
12148 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12150 changed = true;
12151 log = INTVAL (XEXP (x, 1));
12152 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12153 GEN_INT (1 << log));
12156 if (GET_CODE (x) == PLUS)
12158 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12160 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12161 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12162 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12164 changed = true;
12165 log = INTVAL (XEXP (XEXP (x, 0), 1));
12166 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12167 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12168 GEN_INT (1 << log));
12171 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12172 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12173 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12175 changed = true;
12176 log = INTVAL (XEXP (XEXP (x, 1), 1));
12177 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12178 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12179 GEN_INT (1 << log));
12182 /* Put multiply first if it isn't already. */
12183 if (GET_CODE (XEXP (x, 1)) == MULT)
12185 std::swap (XEXP (x, 0), XEXP (x, 1));
12186 changed = true;
12189 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12190 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12191 created by virtual register instantiation, register elimination, and
12192 similar optimizations. */
12193 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12195 changed = true;
12196 x = gen_rtx_PLUS (Pmode,
12197 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12198 XEXP (XEXP (x, 1), 0)),
12199 XEXP (XEXP (x, 1), 1));
12202 /* Canonicalize
12203 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12204 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12205 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12206 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12207 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12208 && CONSTANT_P (XEXP (x, 1)))
12210 rtx constant;
12211 rtx other = NULL_RTX;
12213 if (CONST_INT_P (XEXP (x, 1)))
12215 constant = XEXP (x, 1);
12216 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12218 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12220 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12221 other = XEXP (x, 1);
12223 else
12224 constant = 0;
12226 if (constant)
12228 changed = true;
12229 x = gen_rtx_PLUS (Pmode,
12230 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12231 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12232 plus_constant (Pmode, other,
12233 INTVAL (constant)));
12237 if (changed && ix86_legitimate_address_p (mode, x, false))
12238 return x;
12240 if (GET_CODE (XEXP (x, 0)) == MULT)
12242 changed = true;
12243 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12246 if (GET_CODE (XEXP (x, 1)) == MULT)
12248 changed = true;
12249 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12252 if (changed
12253 && REG_P (XEXP (x, 1))
12254 && REG_P (XEXP (x, 0)))
12255 return x;
12257 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12259 changed = true;
12260 x = legitimize_pic_address (x, 0);
12263 if (changed && ix86_legitimate_address_p (mode, x, false))
12264 return x;
12266 if (REG_P (XEXP (x, 0)))
12268 rtx temp = gen_reg_rtx (Pmode);
12269 rtx val = force_operand (XEXP (x, 1), temp);
12270 if (val != temp)
12272 val = convert_to_mode (Pmode, val, 1);
12273 emit_move_insn (temp, val);
12276 XEXP (x, 1) = temp;
12277 return x;
12280 else if (REG_P (XEXP (x, 1)))
12282 rtx temp = gen_reg_rtx (Pmode);
12283 rtx val = force_operand (XEXP (x, 0), temp);
12284 if (val != temp)
12286 val = convert_to_mode (Pmode, val, 1);
12287 emit_move_insn (temp, val);
12290 XEXP (x, 0) = temp;
12291 return x;
12295 return x;
12298 /* Print an integer constant expression in assembler syntax. Addition
12299 and subtraction are the only arithmetic that may appear in these
12300 expressions. FILE is the stdio stream to write to, X is the rtx, and
12301 CODE is the operand print code from the output string. */
12303 static void
12304 output_pic_addr_const (FILE *file, rtx x, int code)
12306 char buf[256];
12308 switch (GET_CODE (x))
12310 case PC:
12311 gcc_assert (flag_pic);
12312 putc ('.', file);
12313 break;
12315 case SYMBOL_REF:
12316 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12317 output_addr_const (file, x);
12318 else
12320 const char *name = XSTR (x, 0);
12322 /* Mark the decl as referenced so that cgraph will
12323 output the function. */
12324 if (SYMBOL_REF_DECL (x))
12325 mark_decl_referenced (SYMBOL_REF_DECL (x));
12327 #if TARGET_MACHO
12328 if (MACHOPIC_INDIRECT
12329 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12330 name = machopic_indirection_name (x, /*stub_p=*/true);
12331 #endif
12332 assemble_name (file, name);
12334 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12335 && code == 'P' && ix86_call_use_plt_p (x))
12336 fputs ("@PLT", file);
12337 break;
12339 case LABEL_REF:
12340 x = XEXP (x, 0);
12341 /* FALLTHRU */
12342 case CODE_LABEL:
12343 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12344 assemble_name (asm_out_file, buf);
12345 break;
12347 case CONST_INT:
12348 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12349 break;
12351 case CONST:
12352 /* This used to output parentheses around the expression,
12353 but that does not work on the 386 (either ATT or BSD assembler). */
12354 output_pic_addr_const (file, XEXP (x, 0), code);
12355 break;
12357 case CONST_DOUBLE:
12358 /* We can't handle floating point constants;
12359 TARGET_PRINT_OPERAND must handle them. */
12360 output_operand_lossage ("floating constant misused");
12361 break;
12363 case PLUS:
12364 /* Some assemblers need integer constants to appear first. */
12365 if (CONST_INT_P (XEXP (x, 0)))
12367 output_pic_addr_const (file, XEXP (x, 0), code);
12368 putc ('+', file);
12369 output_pic_addr_const (file, XEXP (x, 1), code);
12371 else
12373 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12374 output_pic_addr_const (file, XEXP (x, 1), code);
12375 putc ('+', file);
12376 output_pic_addr_const (file, XEXP (x, 0), code);
12378 break;
12380 case MINUS:
12381 if (!TARGET_MACHO)
12382 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12383 output_pic_addr_const (file, XEXP (x, 0), code);
12384 putc ('-', file);
12385 output_pic_addr_const (file, XEXP (x, 1), code);
12386 if (!TARGET_MACHO)
12387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12388 break;
12390 case UNSPEC:
12391 gcc_assert (XVECLEN (x, 0) == 1);
12392 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12393 switch (XINT (x, 1))
12395 case UNSPEC_GOT:
12396 fputs ("@GOT", file);
12397 break;
12398 case UNSPEC_GOTOFF:
12399 fputs ("@GOTOFF", file);
12400 break;
12401 case UNSPEC_PLTOFF:
12402 fputs ("@PLTOFF", file);
12403 break;
12404 case UNSPEC_PCREL:
12405 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12406 "(%rip)" : "[rip]", file);
12407 break;
12408 case UNSPEC_GOTPCREL:
12409 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12410 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12411 break;
12412 case UNSPEC_GOTTPOFF:
12413 /* FIXME: This might be @TPOFF in Sun ld too. */
12414 fputs ("@gottpoff", file);
12415 break;
12416 case UNSPEC_TPOFF:
12417 fputs ("@tpoff", file);
12418 break;
12419 case UNSPEC_NTPOFF:
12420 if (TARGET_64BIT)
12421 fputs ("@tpoff", file);
12422 else
12423 fputs ("@ntpoff", file);
12424 break;
12425 case UNSPEC_DTPOFF:
12426 fputs ("@dtpoff", file);
12427 break;
12428 case UNSPEC_GOTNTPOFF:
12429 if (TARGET_64BIT)
12430 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12431 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12432 else
12433 fputs ("@gotntpoff", file);
12434 break;
12435 case UNSPEC_INDNTPOFF:
12436 fputs ("@indntpoff", file);
12437 break;
12438 #if TARGET_MACHO
12439 case UNSPEC_MACHOPIC_OFFSET:
12440 putc ('-', file);
12441 machopic_output_function_base_name (file);
12442 break;
12443 #endif
12444 default:
12445 output_operand_lossage ("invalid UNSPEC as operand");
12446 break;
12448 break;
12450 default:
12451 output_operand_lossage ("invalid expression as operand");
12455 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12456 We need to emit DTP-relative relocations. */
12458 static void ATTRIBUTE_UNUSED
12459 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12461 fputs (ASM_LONG, file);
12462 output_addr_const (file, x);
12463 fputs ("@dtpoff", file);
12464 switch (size)
12466 case 4:
12467 break;
12468 case 8:
12469 fputs (", 0", file);
12470 break;
12471 default:
12472 gcc_unreachable ();
12476 /* Return true if X is a representation of the PIC register. This copes
12477 with calls from ix86_find_base_term, where the register might have
12478 been replaced by a cselib value. */
12480 static bool
12481 ix86_pic_register_p (rtx x)
12483 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12484 return (pic_offset_table_rtx
12485 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12486 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12487 return true;
12488 else if (!REG_P (x))
12489 return false;
12490 else if (pic_offset_table_rtx)
12492 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12493 return true;
12494 if (HARD_REGISTER_P (x)
12495 && !HARD_REGISTER_P (pic_offset_table_rtx)
12496 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12497 return true;
12498 return false;
12500 else
12501 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12504 /* Helper function for ix86_delegitimize_address.
12505 Attempt to delegitimize TLS local-exec accesses. */
12507 static rtx
12508 ix86_delegitimize_tls_address (rtx orig_x)
12510 rtx x = orig_x, unspec;
12511 struct ix86_address addr;
12513 if (!TARGET_TLS_DIRECT_SEG_REFS)
12514 return orig_x;
12515 if (MEM_P (x))
12516 x = XEXP (x, 0);
12517 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12518 return orig_x;
12519 if (ix86_decompose_address (x, &addr) == 0
12520 || addr.seg != DEFAULT_TLS_SEG_REG
12521 || addr.disp == NULL_RTX
12522 || GET_CODE (addr.disp) != CONST)
12523 return orig_x;
12524 unspec = XEXP (addr.disp, 0);
12525 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12526 unspec = XEXP (unspec, 0);
12527 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12528 return orig_x;
12529 x = XVECEXP (unspec, 0, 0);
12530 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12531 if (unspec != XEXP (addr.disp, 0))
12532 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12533 if (addr.index)
12535 rtx idx = addr.index;
12536 if (addr.scale != 1)
12537 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12538 x = gen_rtx_PLUS (Pmode, idx, x);
12540 if (addr.base)
12541 x = gen_rtx_PLUS (Pmode, addr.base, x);
12542 if (MEM_P (orig_x))
12543 x = replace_equiv_address_nv (orig_x, x);
12544 return x;
12547 /* In the name of slightly smaller debug output, and to cater to
12548 general assembler lossage, recognize PIC+GOTOFF and turn it back
12549 into a direct symbol reference.
12551 On Darwin, this is necessary to avoid a crash, because Darwin
12552 has a different PIC label for each routine but the DWARF debugging
12553 information is not associated with any particular routine, so it's
12554 necessary to remove references to the PIC label from RTL stored by
12555 the DWARF output code.
12557 This helper is used in the normal ix86_delegitimize_address
12558 entrypoint (e.g. used in the target delegitimization hook) and
12559 in ix86_find_base_term. As compile time memory optimization, we
12560 avoid allocating rtxes that will not change anything on the outcome
12561 of the callers (find_base_value and find_base_term). */
12563 static inline rtx
12564 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
12566 rtx orig_x = delegitimize_mem_from_attrs (x);
12567 /* addend is NULL or some rtx if x is something+GOTOFF where
12568 something doesn't include the PIC register. */
12569 rtx addend = NULL_RTX;
12570 /* reg_addend is NULL or a multiple of some register. */
12571 rtx reg_addend = NULL_RTX;
12572 /* const_addend is NULL or a const_int. */
12573 rtx const_addend = NULL_RTX;
12574 /* This is the result, or NULL. */
12575 rtx result = NULL_RTX;
12577 x = orig_x;
12579 if (MEM_P (x))
12580 x = XEXP (x, 0);
12582 if (TARGET_64BIT)
12584 if (GET_CODE (x) == CONST
12585 && GET_CODE (XEXP (x, 0)) == PLUS
12586 && GET_MODE (XEXP (x, 0)) == Pmode
12587 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12588 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
12589 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
12591 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12592 base. A CONST can't be arg_pointer_rtx based. */
12593 if (base_term_p && MEM_P (orig_x))
12594 return orig_x;
12595 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
12596 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
12597 if (MEM_P (orig_x))
12598 x = replace_equiv_address_nv (orig_x, x);
12599 return x;
12602 if (GET_CODE (x) == CONST
12603 && GET_CODE (XEXP (x, 0)) == UNSPEC
12604 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
12605 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12606 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12608 x = XVECEXP (XEXP (x, 0), 0, 0);
12609 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12611 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12612 if (x == NULL_RTX)
12613 return orig_x;
12615 return x;
12618 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12619 return ix86_delegitimize_tls_address (orig_x);
12621 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12622 and -mcmodel=medium -fpic. */
12625 if (GET_CODE (x) != PLUS
12626 || GET_CODE (XEXP (x, 1)) != CONST)
12627 return ix86_delegitimize_tls_address (orig_x);
12629 if (ix86_pic_register_p (XEXP (x, 0)))
12630 /* %ebx + GOT/GOTOFF */
12632 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12634 /* %ebx + %reg * scale + GOT/GOTOFF */
12635 reg_addend = XEXP (x, 0);
12636 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12637 reg_addend = XEXP (reg_addend, 1);
12638 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12639 reg_addend = XEXP (reg_addend, 0);
12640 else
12642 reg_addend = NULL_RTX;
12643 addend = XEXP (x, 0);
12646 else
12647 addend = XEXP (x, 0);
12649 x = XEXP (XEXP (x, 1), 0);
12650 if (GET_CODE (x) == PLUS
12651 && CONST_INT_P (XEXP (x, 1)))
12653 const_addend = XEXP (x, 1);
12654 x = XEXP (x, 0);
12657 if (GET_CODE (x) == UNSPEC
12658 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12659 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12660 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12661 && !MEM_P (orig_x) && !addend)))
12662 result = XVECEXP (x, 0, 0);
12664 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12665 && !MEM_P (orig_x))
12666 result = XVECEXP (x, 0, 0);
12668 if (! result)
12669 return ix86_delegitimize_tls_address (orig_x);
12671 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12672 recurse on the first operand. */
12673 if (const_addend && !base_term_p)
12674 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12675 if (reg_addend)
12676 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12677 if (addend)
12679 /* If the rest of original X doesn't involve the PIC register, add
12680 addend and subtract pic_offset_table_rtx. This can happen e.g.
12681 for code like:
12682 leal (%ebx, %ecx, 4), %ecx
12684 movl foo@GOTOFF(%ecx), %edx
12685 in which case we return (%ecx - %ebx) + foo
12686 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12687 and reload has completed. Don't do the latter for debug,
12688 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12689 if (pic_offset_table_rtx
12690 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12691 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12692 pic_offset_table_rtx),
12693 result);
12694 else if (base_term_p
12695 && pic_offset_table_rtx
12696 && !TARGET_MACHO
12697 && !TARGET_VXWORKS_RTP)
12699 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12700 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12701 result = gen_rtx_PLUS (Pmode, tmp, result);
12703 else
12704 return orig_x;
12706 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12708 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12709 if (result == NULL_RTX)
12710 return orig_x;
12712 return result;
12715 /* The normal instantiation of the above template. */
12717 static rtx
12718 ix86_delegitimize_address (rtx x)
12720 return ix86_delegitimize_address_1 (x, false);
12723 /* If X is a machine specific address (i.e. a symbol or label being
12724 referenced as a displacement from the GOT implemented using an
12725 UNSPEC), then return the base term. Otherwise return X. */
12728 ix86_find_base_term (rtx x)
12730 rtx term;
12732 if (TARGET_64BIT)
12734 if (GET_CODE (x) != CONST)
12735 return x;
12736 term = XEXP (x, 0);
12737 if (GET_CODE (term) == PLUS
12738 && CONST_INT_P (XEXP (term, 1)))
12739 term = XEXP (term, 0);
12740 if (GET_CODE (term) != UNSPEC
12741 || (XINT (term, 1) != UNSPEC_GOTPCREL
12742 && XINT (term, 1) != UNSPEC_PCREL))
12743 return x;
12745 return XVECEXP (term, 0, 0);
12748 return ix86_delegitimize_address_1 (x, true);
12751 /* Return true if X shouldn't be emitted into the debug info.
12752 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12753 symbol easily into the .debug_info section, so we need not to
12754 delegitimize, but instead assemble as @gotoff.
12755 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12756 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12758 static bool
12759 ix86_const_not_ok_for_debug_p (rtx x)
12761 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12762 return true;
12764 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12765 return true;
12767 return false;
12770 static void
12771 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12772 bool fp, FILE *file)
12774 const char *suffix;
12776 if (mode == CCFPmode)
12778 code = ix86_fp_compare_code_to_integer (code);
12779 mode = CCmode;
12781 if (reverse)
12782 code = reverse_condition (code);
12784 switch (code)
12786 case EQ:
12787 gcc_assert (mode != CCGZmode);
12788 switch (mode)
12790 case E_CCAmode:
12791 suffix = "a";
12792 break;
12793 case E_CCCmode:
12794 suffix = "c";
12795 break;
12796 case E_CCOmode:
12797 suffix = "o";
12798 break;
12799 case E_CCPmode:
12800 suffix = "p";
12801 break;
12802 case E_CCSmode:
12803 suffix = "s";
12804 break;
12805 default:
12806 suffix = "e";
12807 break;
12809 break;
12810 case NE:
12811 gcc_assert (mode != CCGZmode);
12812 switch (mode)
12814 case E_CCAmode:
12815 suffix = "na";
12816 break;
12817 case E_CCCmode:
12818 suffix = "nc";
12819 break;
12820 case E_CCOmode:
12821 suffix = "no";
12822 break;
12823 case E_CCPmode:
12824 suffix = "np";
12825 break;
12826 case E_CCSmode:
12827 suffix = "ns";
12828 break;
12829 default:
12830 suffix = "ne";
12831 break;
12833 break;
12834 case GT:
12835 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12836 suffix = "g";
12837 break;
12838 case GTU:
12839 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12840 Those same assemblers have the same but opposite lossage on cmov. */
12841 if (mode == CCmode)
12842 suffix = fp ? "nbe" : "a";
12843 else
12844 gcc_unreachable ();
12845 break;
12846 case LT:
12847 switch (mode)
12849 case E_CCNOmode:
12850 case E_CCGOCmode:
12851 suffix = "s";
12852 break;
12854 case E_CCmode:
12855 case E_CCGCmode:
12856 case E_CCGZmode:
12857 suffix = "l";
12858 break;
12860 default:
12861 gcc_unreachable ();
12863 break;
12864 case LTU:
12865 if (mode == CCmode || mode == CCGZmode)
12866 suffix = "b";
12867 else if (mode == CCCmode)
12868 suffix = fp ? "b" : "c";
12869 else
12870 gcc_unreachable ();
12871 break;
12872 case GE:
12873 switch (mode)
12875 case E_CCNOmode:
12876 case E_CCGOCmode:
12877 suffix = "ns";
12878 break;
12880 case E_CCmode:
12881 case E_CCGCmode:
12882 case E_CCGZmode:
12883 suffix = "ge";
12884 break;
12886 default:
12887 gcc_unreachable ();
12889 break;
12890 case GEU:
12891 if (mode == CCmode || mode == CCGZmode)
12892 suffix = "nb";
12893 else if (mode == CCCmode)
12894 suffix = fp ? "nb" : "nc";
12895 else
12896 gcc_unreachable ();
12897 break;
12898 case LE:
12899 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12900 suffix = "le";
12901 break;
12902 case LEU:
12903 if (mode == CCmode)
12904 suffix = "be";
12905 else
12906 gcc_unreachable ();
12907 break;
12908 case UNORDERED:
12909 suffix = fp ? "u" : "p";
12910 break;
12911 case ORDERED:
12912 suffix = fp ? "nu" : "np";
12913 break;
12914 default:
12915 gcc_unreachable ();
12917 fputs (suffix, file);
12920 /* Print the name of register X to FILE based on its machine mode and number.
12921 If CODE is 'w', pretend the mode is HImode.
12922 If CODE is 'b', pretend the mode is QImode.
12923 If CODE is 'k', pretend the mode is SImode.
12924 If CODE is 'q', pretend the mode is DImode.
12925 If CODE is 'x', pretend the mode is V4SFmode.
12926 If CODE is 't', pretend the mode is V8SFmode.
12927 If CODE is 'g', pretend the mode is V16SFmode.
12928 If CODE is 'h', pretend the reg is the 'high' byte register.
12929 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12930 If CODE is 'd', duplicate the operand for AVX instruction.
12931 If CODE is 'V', print naked full integer register name without %.
12934 void
12935 print_reg (rtx x, int code, FILE *file)
12937 const char *reg;
12938 int msize;
12939 unsigned int regno;
12940 bool duplicated;
12942 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12943 putc ('%', file);
12945 if (x == pc_rtx)
12947 gcc_assert (TARGET_64BIT);
12948 fputs ("rip", file);
12949 return;
12952 if (code == 'y' && STACK_TOP_P (x))
12954 fputs ("st(0)", file);
12955 return;
12958 if (code == 'w')
12959 msize = 2;
12960 else if (code == 'b')
12961 msize = 1;
12962 else if (code == 'k')
12963 msize = 4;
12964 else if (code == 'q')
12965 msize = 8;
12966 else if (code == 'h')
12967 msize = 0;
12968 else if (code == 'x')
12969 msize = 16;
12970 else if (code == 't')
12971 msize = 32;
12972 else if (code == 'g')
12973 msize = 64;
12974 else
12975 msize = GET_MODE_SIZE (GET_MODE (x));
12977 regno = REGNO (x);
12979 if (regno == ARG_POINTER_REGNUM
12980 || regno == FRAME_POINTER_REGNUM
12981 || regno == FPSR_REG)
12983 output_operand_lossage
12984 ("invalid use of register '%s'", reg_names[regno]);
12985 return;
12987 else if (regno == FLAGS_REG)
12989 output_operand_lossage ("invalid use of asm flag output");
12990 return;
12993 if (code == 'V')
12995 if (GENERAL_REGNO_P (regno))
12996 msize = GET_MODE_SIZE (word_mode);
12997 else
12998 error ("%<V%> modifier on non-integer register");
13001 duplicated = code == 'd' && TARGET_AVX;
13003 switch (msize)
13005 case 16:
13006 case 12:
13007 case 8:
13008 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
13009 warning (0, "unsupported size for integer register");
13010 /* FALLTHRU */
13011 case 4:
13012 if (LEGACY_INT_REGNO_P (regno))
13013 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
13014 /* FALLTHRU */
13015 case 2:
13016 normal:
13017 reg = hi_reg_name[regno];
13018 break;
13019 case 1:
13020 if (regno >= ARRAY_SIZE (qi_reg_name))
13021 goto normal;
13022 if (!ANY_QI_REGNO_P (regno))
13023 error ("unsupported size for integer register");
13024 reg = qi_reg_name[regno];
13025 break;
13026 case 0:
13027 if (regno >= ARRAY_SIZE (qi_high_reg_name))
13028 goto normal;
13029 reg = qi_high_reg_name[regno];
13030 break;
13031 case 32:
13032 case 64:
13033 if (SSE_REGNO_P (regno))
13035 gcc_assert (!duplicated);
13036 putc (msize == 32 ? 'y' : 'z', file);
13037 reg = hi_reg_name[regno] + 1;
13038 break;
13040 goto normal;
13041 default:
13042 gcc_unreachable ();
13045 fputs (reg, file);
13047 /* Irritatingly, AMD extended registers use
13048 different naming convention: "r%d[bwd]" */
13049 if (REX_INT_REGNO_P (regno))
13051 gcc_assert (TARGET_64BIT);
13052 switch (msize)
13054 case 0:
13055 error ("extended registers have no high halves");
13056 break;
13057 case 1:
13058 putc ('b', file);
13059 break;
13060 case 2:
13061 putc ('w', file);
13062 break;
13063 case 4:
13064 putc ('d', file);
13065 break;
13066 case 8:
13067 /* no suffix */
13068 break;
13069 default:
13070 error ("unsupported operand size for extended register");
13071 break;
13073 return;
13076 if (duplicated)
13078 if (ASSEMBLER_DIALECT == ASM_ATT)
13079 fprintf (file, ", %%%s", reg);
13080 else
13081 fprintf (file, ", %s", reg);
13085 /* Meaning of CODE:
13086 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13087 C -- print opcode suffix for set/cmov insn.
13088 c -- like C, but print reversed condition
13089 F,f -- likewise, but for floating-point.
13090 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13091 otherwise nothing
13092 R -- print embedded rounding and sae.
13093 r -- print only sae.
13094 z -- print the opcode suffix for the size of the current operand.
13095 Z -- likewise, with special suffixes for x87 instructions.
13096 * -- print a star (in certain assembler syntax)
13097 A -- print an absolute memory reference.
13098 E -- print address with DImode register names if TARGET_64BIT.
13099 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13100 s -- print a shift double count, followed by the assemblers argument
13101 delimiter.
13102 b -- print the QImode name of the register for the indicated operand.
13103 %b0 would print %al if operands[0] is reg 0.
13104 w -- likewise, print the HImode name of the register.
13105 k -- likewise, print the SImode name of the register.
13106 q -- likewise, print the DImode name of the register.
13107 x -- likewise, print the V4SFmode name of the register.
13108 t -- likewise, print the V8SFmode name of the register.
13109 g -- likewise, print the V16SFmode name of the register.
13110 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13111 y -- print "st(0)" instead of "st" as a register.
13112 d -- print duplicated register operand for AVX instruction.
13113 D -- print condition for SSE cmp instruction.
13114 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13115 address from GOT.
13116 p -- print raw symbol name.
13117 X -- don't print any sort of PIC '@' suffix for a symbol.
13118 & -- print some in-use local-dynamic symbol name.
13119 H -- print a memory address offset by 8; used for sse high-parts
13120 Y -- print condition for XOP pcom* instruction.
13121 V -- print naked full integer register name without %.
13122 + -- print a branch hint as 'cs' or 'ds' prefix
13123 ; -- print a semicolon (after prefixes due to bug in older gas).
13124 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13125 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13126 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13127 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13128 N -- print maskz if it's constant 0 operand.
13131 void
13132 ix86_print_operand (FILE *file, rtx x, int code)
13134 if (code)
13136 switch (code)
13138 case 'A':
13139 switch (ASSEMBLER_DIALECT)
13141 case ASM_ATT:
13142 putc ('*', file);
13143 break;
13145 case ASM_INTEL:
13146 /* Intel syntax. For absolute addresses, registers should not
13147 be surrounded by braces. */
13148 if (!REG_P (x))
13150 putc ('[', file);
13151 ix86_print_operand (file, x, 0);
13152 putc (']', file);
13153 return;
13155 break;
13157 default:
13158 gcc_unreachable ();
13161 ix86_print_operand (file, x, 0);
13162 return;
13164 case 'E':
13165 /* Wrap address in an UNSPEC to declare special handling. */
13166 if (TARGET_64BIT)
13167 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13169 output_address (VOIDmode, x);
13170 return;
13172 case 'L':
13173 if (ASSEMBLER_DIALECT == ASM_ATT)
13174 putc ('l', file);
13175 return;
13177 case 'W':
13178 if (ASSEMBLER_DIALECT == ASM_ATT)
13179 putc ('w', file);
13180 return;
13182 case 'B':
13183 if (ASSEMBLER_DIALECT == ASM_ATT)
13184 putc ('b', file);
13185 return;
13187 case 'Q':
13188 if (ASSEMBLER_DIALECT == ASM_ATT)
13189 putc ('l', file);
13190 return;
13192 case 'S':
13193 if (ASSEMBLER_DIALECT == ASM_ATT)
13194 putc ('s', file);
13195 return;
13197 case 'T':
13198 if (ASSEMBLER_DIALECT == ASM_ATT)
13199 putc ('t', file);
13200 return;
13202 case 'O':
13203 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13204 if (ASSEMBLER_DIALECT != ASM_ATT)
13205 return;
13207 switch (GET_MODE_SIZE (GET_MODE (x)))
13209 case 2:
13210 putc ('w', file);
13211 break;
13213 case 4:
13214 putc ('l', file);
13215 break;
13217 case 8:
13218 putc ('q', file);
13219 break;
13221 default:
13222 output_operand_lossage ("invalid operand size for operand "
13223 "code 'O'");
13224 return;
13227 putc ('.', file);
13228 #endif
13229 return;
13231 case 'z':
13232 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13234 /* Opcodes don't get size suffixes if using Intel opcodes. */
13235 if (ASSEMBLER_DIALECT == ASM_INTEL)
13236 return;
13238 switch (GET_MODE_SIZE (GET_MODE (x)))
13240 case 1:
13241 putc ('b', file);
13242 return;
13244 case 2:
13245 putc ('w', file);
13246 return;
13248 case 4:
13249 putc ('l', file);
13250 return;
13252 case 8:
13253 putc ('q', file);
13254 return;
13256 default:
13257 output_operand_lossage ("invalid operand size for operand "
13258 "code 'z'");
13259 return;
13263 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13265 if (this_is_asm_operands)
13266 warning_for_asm (this_is_asm_operands,
13267 "non-integer operand used with operand code %<z%>");
13268 else
13269 warning (0, "non-integer operand used with operand code %<z%>");
13271 /* FALLTHRU */
13273 case 'Z':
13274 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13275 if (ASSEMBLER_DIALECT == ASM_INTEL)
13276 return;
13278 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13280 switch (GET_MODE_SIZE (GET_MODE (x)))
13282 case 2:
13283 #ifdef HAVE_AS_IX86_FILDS
13284 putc ('s', file);
13285 #endif
13286 return;
13288 case 4:
13289 putc ('l', file);
13290 return;
13292 case 8:
13293 #ifdef HAVE_AS_IX86_FILDQ
13294 putc ('q', file);
13295 #else
13296 fputs ("ll", file);
13297 #endif
13298 return;
13300 default:
13301 break;
13304 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13306 /* 387 opcodes don't get size suffixes
13307 if the operands are registers. */
13308 if (STACK_REG_P (x))
13309 return;
13311 switch (GET_MODE_SIZE (GET_MODE (x)))
13313 case 4:
13314 putc ('s', file);
13315 return;
13317 case 8:
13318 putc ('l', file);
13319 return;
13321 case 12:
13322 case 16:
13323 putc ('t', file);
13324 return;
13326 default:
13327 break;
13330 else
13332 output_operand_lossage ("invalid operand type used with "
13333 "operand code '%c'", code);
13334 return;
13337 output_operand_lossage ("invalid operand size for operand code '%c'",
13338 code);
13339 return;
13341 case 'd':
13342 case 'b':
13343 case 'w':
13344 case 'k':
13345 case 'q':
13346 case 'h':
13347 case 't':
13348 case 'g':
13349 case 'y':
13350 case 'x':
13351 case 'X':
13352 case 'P':
13353 case 'p':
13354 case 'V':
13355 break;
13357 case 's':
13358 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13360 ix86_print_operand (file, x, 0);
13361 fputs (", ", file);
13363 return;
13365 case 'Y':
13366 switch (GET_CODE (x))
13368 case NE:
13369 fputs ("neq", file);
13370 break;
13371 case EQ:
13372 fputs ("eq", file);
13373 break;
13374 case GE:
13375 case GEU:
13376 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13377 break;
13378 case GT:
13379 case GTU:
13380 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13381 break;
13382 case LE:
13383 case LEU:
13384 fputs ("le", file);
13385 break;
13386 case LT:
13387 case LTU:
13388 fputs ("lt", file);
13389 break;
13390 case UNORDERED:
13391 fputs ("unord", file);
13392 break;
13393 case ORDERED:
13394 fputs ("ord", file);
13395 break;
13396 case UNEQ:
13397 fputs ("ueq", file);
13398 break;
13399 case UNGE:
13400 fputs ("nlt", file);
13401 break;
13402 case UNGT:
13403 fputs ("nle", file);
13404 break;
13405 case UNLE:
13406 fputs ("ule", file);
13407 break;
13408 case UNLT:
13409 fputs ("ult", file);
13410 break;
13411 case LTGT:
13412 fputs ("une", file);
13413 break;
13414 default:
13415 output_operand_lossage ("operand is not a condition code, "
13416 "invalid operand code 'Y'");
13417 return;
13419 return;
13421 case 'D':
13422 /* Little bit of braindamage here. The SSE compare instructions
13423 does use completely different names for the comparisons that the
13424 fp conditional moves. */
13425 switch (GET_CODE (x))
13427 case UNEQ:
13428 if (TARGET_AVX)
13430 fputs ("eq_us", file);
13431 break;
13433 /* FALLTHRU */
13434 case EQ:
13435 fputs ("eq", file);
13436 break;
13437 case UNLT:
13438 if (TARGET_AVX)
13440 fputs ("nge", file);
13441 break;
13443 /* FALLTHRU */
13444 case LT:
13445 fputs ("lt", file);
13446 break;
13447 case UNLE:
13448 if (TARGET_AVX)
13450 fputs ("ngt", file);
13451 break;
13453 /* FALLTHRU */
13454 case LE:
13455 fputs ("le", file);
13456 break;
13457 case UNORDERED:
13458 fputs ("unord", file);
13459 break;
13460 case LTGT:
13461 if (TARGET_AVX)
13463 fputs ("neq_oq", file);
13464 break;
13466 /* FALLTHRU */
13467 case NE:
13468 fputs ("neq", file);
13469 break;
13470 case GE:
13471 if (TARGET_AVX)
13473 fputs ("ge", file);
13474 break;
13476 /* FALLTHRU */
13477 case UNGE:
13478 fputs ("nlt", file);
13479 break;
13480 case GT:
13481 if (TARGET_AVX)
13483 fputs ("gt", file);
13484 break;
13486 /* FALLTHRU */
13487 case UNGT:
13488 fputs ("nle", file);
13489 break;
13490 case ORDERED:
13491 fputs ("ord", file);
13492 break;
13493 default:
13494 output_operand_lossage ("operand is not a condition code, "
13495 "invalid operand code 'D'");
13496 return;
13498 return;
13500 case 'F':
13501 case 'f':
13502 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13503 if (ASSEMBLER_DIALECT == ASM_ATT)
13504 putc ('.', file);
13505 gcc_fallthrough ();
13506 #endif
13508 case 'C':
13509 case 'c':
13510 if (!COMPARISON_P (x))
13512 output_operand_lossage ("operand is not a condition code, "
13513 "invalid operand code '%c'", code);
13514 return;
13516 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13517 code == 'c' || code == 'f',
13518 code == 'F' || code == 'f',
13519 file);
13520 return;
13522 case 'H':
13523 if (!offsettable_memref_p (x))
13525 output_operand_lossage ("operand is not an offsettable memory "
13526 "reference, invalid operand code 'H'");
13527 return;
13529 /* It doesn't actually matter what mode we use here, as we're
13530 only going to use this for printing. */
13531 x = adjust_address_nv (x, DImode, 8);
13532 /* Output 'qword ptr' for intel assembler dialect. */
13533 if (ASSEMBLER_DIALECT == ASM_INTEL)
13534 code = 'q';
13535 break;
13537 case 'K':
13538 if (!CONST_INT_P (x))
13540 output_operand_lossage ("operand is not an integer, invalid "
13541 "operand code 'K'");
13542 return;
13545 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13546 #ifdef HAVE_AS_IX86_HLE
13547 fputs ("xacquire ", file);
13548 #else
13549 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13550 #endif
13551 else if (INTVAL (x) & IX86_HLE_RELEASE)
13552 #ifdef HAVE_AS_IX86_HLE
13553 fputs ("xrelease ", file);
13554 #else
13555 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
13556 #endif
13557 /* We do not want to print value of the operand. */
13558 return;
13560 case 'N':
13561 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
13562 fputs ("{z}", file);
13563 return;
13565 case 'r':
13566 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
13568 output_operand_lossage ("operand is not a specific integer, "
13569 "invalid operand code 'r'");
13570 return;
13573 if (ASSEMBLER_DIALECT == ASM_INTEL)
13574 fputs (", ", file);
13576 fputs ("{sae}", file);
13578 if (ASSEMBLER_DIALECT == ASM_ATT)
13579 fputs (", ", file);
13581 return;
13583 case 'R':
13584 if (!CONST_INT_P (x))
13586 output_operand_lossage ("operand is not an integer, invalid "
13587 "operand code 'R'");
13588 return;
13591 if (ASSEMBLER_DIALECT == ASM_INTEL)
13592 fputs (", ", file);
13594 switch (INTVAL (x))
13596 case ROUND_NEAREST_INT | ROUND_SAE:
13597 fputs ("{rn-sae}", file);
13598 break;
13599 case ROUND_NEG_INF | ROUND_SAE:
13600 fputs ("{rd-sae}", file);
13601 break;
13602 case ROUND_POS_INF | ROUND_SAE:
13603 fputs ("{ru-sae}", file);
13604 break;
13605 case ROUND_ZERO | ROUND_SAE:
13606 fputs ("{rz-sae}", file);
13607 break;
13608 default:
13609 output_operand_lossage ("operand is not a specific integer, "
13610 "invalid operand code 'R'");
13613 if (ASSEMBLER_DIALECT == ASM_ATT)
13614 fputs (", ", file);
13616 return;
13618 case '*':
13619 if (ASSEMBLER_DIALECT == ASM_ATT)
13620 putc ('*', file);
13621 return;
13623 case '&':
13625 const char *name = get_some_local_dynamic_name ();
13626 if (name == NULL)
13627 output_operand_lossage ("'%%&' used without any "
13628 "local dynamic TLS references");
13629 else
13630 assemble_name (file, name);
13631 return;
13634 case '+':
13636 rtx x;
13638 if (!optimize
13639 || optimize_function_for_size_p (cfun)
13640 || !TARGET_BRANCH_PREDICTION_HINTS)
13641 return;
13643 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13644 if (x)
13646 int pred_val = profile_probability::from_reg_br_prob_note
13647 (XINT (x, 0)).to_reg_br_prob_base ();
13649 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13650 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13652 bool taken = pred_val > REG_BR_PROB_BASE / 2;
13653 bool cputaken
13654 = final_forward_branch_p (current_output_insn) == 0;
13656 /* Emit hints only in the case default branch prediction
13657 heuristics would fail. */
13658 if (taken != cputaken)
13660 /* We use 3e (DS) prefix for taken branches and
13661 2e (CS) prefix for not taken branches. */
13662 if (taken)
13663 fputs ("ds ; ", file);
13664 else
13665 fputs ("cs ; ", file);
13669 return;
13672 case ';':
13673 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13674 putc (';', file);
13675 #endif
13676 return;
13678 case '~':
13679 putc (TARGET_AVX2 ? 'i' : 'f', file);
13680 return;
13682 case 'M':
13683 if (TARGET_X32)
13685 /* NB: 32-bit indices in VSIB address are sign-extended
13686 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13687 sign-extended to 0xfffffffff7fa3010 which is invalid
13688 address. Add addr32 prefix if there is no base
13689 register nor symbol. */
13690 bool ok;
13691 struct ix86_address parts;
13692 ok = ix86_decompose_address (x, &parts);
13693 gcc_assert (ok && parts.index == NULL_RTX);
13694 if (parts.base == NULL_RTX
13695 && (parts.disp == NULL_RTX
13696 || !symbolic_operand (parts.disp,
13697 GET_MODE (parts.disp))))
13698 fputs ("addr32 ", file);
13700 return;
13702 case '^':
13703 if (TARGET_64BIT && Pmode != word_mode)
13704 fputs ("addr32 ", file);
13705 return;
13707 case '!':
13708 if (ix86_notrack_prefixed_insn_p (current_output_insn))
13709 fputs ("notrack ", file);
13710 return;
13712 default:
13713 output_operand_lossage ("invalid operand code '%c'", code);
13717 if (REG_P (x))
13718 print_reg (x, code, file);
13720 else if (MEM_P (x))
13722 rtx addr = XEXP (x, 0);
13724 /* No `byte ptr' prefix for call instructions ... */
13725 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13727 machine_mode mode = GET_MODE (x);
13728 const char *size;
13730 /* Check for explicit size override codes. */
13731 if (code == 'b')
13732 size = "BYTE";
13733 else if (code == 'w')
13734 size = "WORD";
13735 else if (code == 'k')
13736 size = "DWORD";
13737 else if (code == 'q')
13738 size = "QWORD";
13739 else if (code == 'x')
13740 size = "XMMWORD";
13741 else if (code == 't')
13742 size = "YMMWORD";
13743 else if (code == 'g')
13744 size = "ZMMWORD";
13745 else if (mode == BLKmode)
13746 /* ... or BLKmode operands, when not overridden. */
13747 size = NULL;
13748 else
13749 switch (GET_MODE_SIZE (mode))
13751 case 1: size = "BYTE"; break;
13752 case 2: size = "WORD"; break;
13753 case 4: size = "DWORD"; break;
13754 case 8: size = "QWORD"; break;
13755 case 12: size = "TBYTE"; break;
13756 case 16:
13757 if (mode == XFmode)
13758 size = "TBYTE";
13759 else
13760 size = "XMMWORD";
13761 break;
13762 case 32: size = "YMMWORD"; break;
13763 case 64: size = "ZMMWORD"; break;
13764 default:
13765 gcc_unreachable ();
13767 if (size)
13769 fputs (size, file);
13770 fputs (" PTR ", file);
13774 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13775 output_operand_lossage ("invalid constraints for operand");
13776 else
13777 ix86_print_operand_address_as
13778 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13781 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
13783 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
13784 REAL_MODE_FORMAT (HFmode));
13785 if (ASSEMBLER_DIALECT == ASM_ATT)
13786 putc ('$', file);
13787 fprintf (file, "0x%04x", (unsigned int) l);
13790 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13792 long l;
13794 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13796 if (ASSEMBLER_DIALECT == ASM_ATT)
13797 putc ('$', file);
13798 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13799 if (code == 'q')
13800 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13801 (unsigned long long) (int) l);
13802 else
13803 fprintf (file, "0x%08x", (unsigned int) l);
13806 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13808 long l[2];
13810 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13812 if (ASSEMBLER_DIALECT == ASM_ATT)
13813 putc ('$', file);
13814 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13817 /* These float cases don't actually occur as immediate operands. */
13818 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13820 char dstr[30];
13822 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13823 fputs (dstr, file);
13826 /* Print bcst_mem_operand. */
13827 else if (GET_CODE (x) == VEC_DUPLICATE)
13829 machine_mode vmode = GET_MODE (x);
13830 /* Must be bcst_memory_operand. */
13831 gcc_assert (bcst_mem_operand (x, vmode));
13833 rtx mem = XEXP (x,0);
13834 ix86_print_operand (file, mem, 0);
13836 switch (vmode)
13838 case E_V2DImode:
13839 case E_V2DFmode:
13840 fputs ("{1to2}", file);
13841 break;
13842 case E_V4SImode:
13843 case E_V4SFmode:
13844 case E_V4DImode:
13845 case E_V4DFmode:
13846 fputs ("{1to4}", file);
13847 break;
13848 case E_V8SImode:
13849 case E_V8SFmode:
13850 case E_V8DFmode:
13851 case E_V8DImode:
13852 case E_V8HFmode:
13853 fputs ("{1to8}", file);
13854 break;
13855 case E_V16SFmode:
13856 case E_V16SImode:
13857 case E_V16HFmode:
13858 fputs ("{1to16}", file);
13859 break;
13860 case E_V32HFmode:
13861 fputs ("{1to32}", file);
13862 break;
13863 default:
13864 gcc_unreachable ();
13868 else
13870 /* We have patterns that allow zero sets of memory, for instance.
13871 In 64-bit mode, we should probably support all 8-byte vectors,
13872 since we can in fact encode that into an immediate. */
13873 if (GET_CODE (x) == CONST_VECTOR)
13875 if (x != CONST0_RTX (GET_MODE (x)))
13876 output_operand_lossage ("invalid vector immediate");
13877 x = const0_rtx;
13880 if (code == 'P')
13882 if (ix86_force_load_from_GOT_p (x, true))
13884 /* For inline assembly statement, load function address
13885 from GOT with 'P' operand modifier to avoid PLT. */
13886 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13887 (TARGET_64BIT
13888 ? UNSPEC_GOTPCREL
13889 : UNSPEC_GOT));
13890 x = gen_rtx_CONST (Pmode, x);
13891 x = gen_const_mem (Pmode, x);
13892 ix86_print_operand (file, x, 'A');
13893 return;
13896 else if (code != 'p')
13898 if (CONST_INT_P (x))
13900 if (ASSEMBLER_DIALECT == ASM_ATT)
13901 putc ('$', file);
13903 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13904 || GET_CODE (x) == LABEL_REF)
13906 if (ASSEMBLER_DIALECT == ASM_ATT)
13907 putc ('$', file);
13908 else
13909 fputs ("OFFSET FLAT:", file);
13912 if (CONST_INT_P (x))
13913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13914 else if (flag_pic || MACHOPIC_INDIRECT)
13915 output_pic_addr_const (file, x, code);
13916 else
13917 output_addr_const (file, x);
13921 static bool
13922 ix86_print_operand_punct_valid_p (unsigned char code)
13924 return (code == '*' || code == '+' || code == '&' || code == ';'
13925 || code == '~' || code == '^' || code == '!');
13928 /* Print a memory operand whose address is ADDR. */
13930 static void
13931 ix86_print_operand_address_as (FILE *file, rtx addr,
13932 addr_space_t as, bool raw)
13934 struct ix86_address parts;
13935 rtx base, index, disp;
13936 int scale;
13937 int ok;
13938 bool vsib = false;
13939 int code = 0;
13941 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13943 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13944 gcc_assert (parts.index == NULL_RTX);
13945 parts.index = XVECEXP (addr, 0, 1);
13946 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13947 addr = XVECEXP (addr, 0, 0);
13948 vsib = true;
13950 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13952 gcc_assert (TARGET_64BIT);
13953 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13954 code = 'q';
13956 else
13957 ok = ix86_decompose_address (addr, &parts);
13959 gcc_assert (ok);
13961 base = parts.base;
13962 index = parts.index;
13963 disp = parts.disp;
13964 scale = parts.scale;
13966 if (ADDR_SPACE_GENERIC_P (as))
13967 as = parts.seg;
13968 else
13969 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13971 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
13973 if (ASSEMBLER_DIALECT == ASM_ATT)
13974 putc ('%', file);
13976 switch (as)
13978 case ADDR_SPACE_SEG_FS:
13979 fputs ("fs:", file);
13980 break;
13981 case ADDR_SPACE_SEG_GS:
13982 fputs ("gs:", file);
13983 break;
13984 default:
13985 gcc_unreachable ();
13989 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13990 if (TARGET_64BIT && !base && !index && !raw)
13992 rtx symbol = disp;
13994 if (GET_CODE (disp) == CONST
13995 && GET_CODE (XEXP (disp, 0)) == PLUS
13996 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13997 symbol = XEXP (XEXP (disp, 0), 0);
13999 if (GET_CODE (symbol) == LABEL_REF
14000 || (GET_CODE (symbol) == SYMBOL_REF
14001 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14002 base = pc_rtx;
14005 if (!base && !index)
14007 /* Displacement only requires special attention. */
14008 if (CONST_INT_P (disp))
14010 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
14011 fputs ("ds:", file);
14012 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14014 /* Load the external function address via the GOT slot to avoid PLT. */
14015 else if (GET_CODE (disp) == CONST
14016 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14017 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
14018 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
14019 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
14020 output_pic_addr_const (file, disp, 0);
14021 else if (flag_pic)
14022 output_pic_addr_const (file, disp, 0);
14023 else
14024 output_addr_const (file, disp);
14026 else
14028 /* Print SImode register names to force addr32 prefix. */
14029 if (SImode_address_operand (addr, VOIDmode))
14031 if (flag_checking)
14033 gcc_assert (TARGET_64BIT);
14034 switch (GET_CODE (addr))
14036 case SUBREG:
14037 gcc_assert (GET_MODE (addr) == SImode);
14038 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
14039 break;
14040 case ZERO_EXTEND:
14041 case AND:
14042 gcc_assert (GET_MODE (addr) == DImode);
14043 break;
14044 default:
14045 gcc_unreachable ();
14048 gcc_assert (!code);
14049 code = 'k';
14051 else if (code == 0
14052 && TARGET_X32
14053 && disp
14054 && CONST_INT_P (disp)
14055 && INTVAL (disp) < -16*1024*1024)
14057 /* X32 runs in 64-bit mode, where displacement, DISP, in
14058 address DISP(%r64), is encoded as 32-bit immediate sign-
14059 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14060 address is %r64 + 0xffffffffbffffd00. When %r64 <
14061 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14062 which is invalid for x32. The correct address is %r64
14063 - 0x40000300 == 0xf7ffdd64. To properly encode
14064 -0x40000300(%r64) for x32, we zero-extend negative
14065 displacement by forcing addr32 prefix which truncates
14066 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14067 zero-extend all negative displacements, including -1(%rsp).
14068 However, for small negative displacements, sign-extension
14069 won't cause overflow. We only zero-extend negative
14070 displacements if they < -16*1024*1024, which is also used
14071 to check legitimate address displacements for PIC. */
14072 code = 'k';
14075 /* Since the upper 32 bits of RSP are always zero for x32,
14076 we can encode %esp as %rsp to avoid 0x67 prefix if
14077 there is no index register. */
14078 if (TARGET_X32 && Pmode == SImode
14079 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14080 code = 'q';
14082 if (ASSEMBLER_DIALECT == ASM_ATT)
14084 if (disp)
14086 if (flag_pic)
14087 output_pic_addr_const (file, disp, 0);
14088 else if (GET_CODE (disp) == LABEL_REF)
14089 output_asm_label (disp);
14090 else
14091 output_addr_const (file, disp);
14094 putc ('(', file);
14095 if (base)
14096 print_reg (base, code, file);
14097 if (index)
14099 putc (',', file);
14100 print_reg (index, vsib ? 0 : code, file);
14101 if (scale != 1 || vsib)
14102 fprintf (file, ",%d", scale);
14104 putc (')', file);
14106 else
14108 rtx offset = NULL_RTX;
14110 if (disp)
14112 /* Pull out the offset of a symbol; print any symbol itself. */
14113 if (GET_CODE (disp) == CONST
14114 && GET_CODE (XEXP (disp, 0)) == PLUS
14115 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14117 offset = XEXP (XEXP (disp, 0), 1);
14118 disp = gen_rtx_CONST (VOIDmode,
14119 XEXP (XEXP (disp, 0), 0));
14122 if (flag_pic)
14123 output_pic_addr_const (file, disp, 0);
14124 else if (GET_CODE (disp) == LABEL_REF)
14125 output_asm_label (disp);
14126 else if (CONST_INT_P (disp))
14127 offset = disp;
14128 else
14129 output_addr_const (file, disp);
14132 putc ('[', file);
14133 if (base)
14135 print_reg (base, code, file);
14136 if (offset)
14138 if (INTVAL (offset) >= 0)
14139 putc ('+', file);
14140 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14143 else if (offset)
14144 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14145 else
14146 putc ('0', file);
14148 if (index)
14150 putc ('+', file);
14151 print_reg (index, vsib ? 0 : code, file);
14152 if (scale != 1 || vsib)
14153 fprintf (file, "*%d", scale);
14155 putc (']', file);
14160 static void
14161 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14163 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14164 output_operand_lossage ("invalid constraints for operand");
14165 else
14166 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
14169 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14171 static bool
14172 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14174 rtx op;
14176 if (GET_CODE (x) != UNSPEC)
14177 return false;
14179 op = XVECEXP (x, 0, 0);
14180 switch (XINT (x, 1))
14182 case UNSPEC_GOTOFF:
14183 output_addr_const (file, op);
14184 fputs ("@gotoff", file);
14185 break;
14186 case UNSPEC_GOTTPOFF:
14187 output_addr_const (file, op);
14188 /* FIXME: This might be @TPOFF in Sun ld. */
14189 fputs ("@gottpoff", file);
14190 break;
14191 case UNSPEC_TPOFF:
14192 output_addr_const (file, op);
14193 fputs ("@tpoff", file);
14194 break;
14195 case UNSPEC_NTPOFF:
14196 output_addr_const (file, op);
14197 if (TARGET_64BIT)
14198 fputs ("@tpoff", file);
14199 else
14200 fputs ("@ntpoff", file);
14201 break;
14202 case UNSPEC_DTPOFF:
14203 output_addr_const (file, op);
14204 fputs ("@dtpoff", file);
14205 break;
14206 case UNSPEC_GOTNTPOFF:
14207 output_addr_const (file, op);
14208 if (TARGET_64BIT)
14209 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14210 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14211 else
14212 fputs ("@gotntpoff", file);
14213 break;
14214 case UNSPEC_INDNTPOFF:
14215 output_addr_const (file, op);
14216 fputs ("@indntpoff", file);
14217 break;
14218 #if TARGET_MACHO
14219 case UNSPEC_MACHOPIC_OFFSET:
14220 output_addr_const (file, op);
14221 putc ('-', file);
14222 machopic_output_function_base_name (file);
14223 break;
14224 #endif
14226 default:
14227 return false;
14230 return true;
14234 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14235 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14236 is the expression of the binary operation. The output may either be
14237 emitted here, or returned to the caller, like all output_* functions.
14239 There is no guarantee that the operands are the same mode, as they
14240 might be within FLOAT or FLOAT_EXTEND expressions. */
14242 #ifndef SYSV386_COMPAT
14243 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14244 wants to fix the assemblers because that causes incompatibility
14245 with gcc. No-one wants to fix gcc because that causes
14246 incompatibility with assemblers... You can use the option of
14247 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14248 #define SYSV386_COMPAT 1
14249 #endif
14251 const char *
14252 output_387_binary_op (rtx_insn *insn, rtx *operands)
14254 static char buf[40];
14255 const char *p;
14256 bool is_sse
14257 = (SSE_REG_P (operands[0])
14258 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14260 if (is_sse)
14261 p = "%v";
14262 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14263 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14264 p = "fi";
14265 else
14266 p = "f";
14268 strcpy (buf, p);
14270 switch (GET_CODE (operands[3]))
14272 case PLUS:
14273 p = "add"; break;
14274 case MINUS:
14275 p = "sub"; break;
14276 case MULT:
14277 p = "mul"; break;
14278 case DIV:
14279 p = "div"; break;
14280 default:
14281 gcc_unreachable ();
14284 strcat (buf, p);
14286 if (is_sse)
14288 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14289 strcat (buf, p);
14291 if (TARGET_AVX)
14292 p = "\t{%2, %1, %0|%0, %1, %2}";
14293 else
14294 p = "\t{%2, %0|%0, %2}";
14296 strcat (buf, p);
14297 return buf;
14300 /* Even if we do not want to check the inputs, this documents input
14301 constraints. Which helps in understanding the following code. */
14302 if (flag_checking)
14304 if (STACK_REG_P (operands[0])
14305 && ((REG_P (operands[1])
14306 && REGNO (operands[0]) == REGNO (operands[1])
14307 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14308 || (REG_P (operands[2])
14309 && REGNO (operands[0]) == REGNO (operands[2])
14310 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14311 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14312 ; /* ok */
14313 else
14314 gcc_unreachable ();
14317 switch (GET_CODE (operands[3]))
14319 case MULT:
14320 case PLUS:
14321 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14322 std::swap (operands[1], operands[2]);
14324 /* know operands[0] == operands[1]. */
14326 if (MEM_P (operands[2]))
14328 p = "%Z2\t%2";
14329 break;
14332 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14334 if (STACK_TOP_P (operands[0]))
14335 /* How is it that we are storing to a dead operand[2]?
14336 Well, presumably operands[1] is dead too. We can't
14337 store the result to st(0) as st(0) gets popped on this
14338 instruction. Instead store to operands[2] (which I
14339 think has to be st(1)). st(1) will be popped later.
14340 gcc <= 2.8.1 didn't have this check and generated
14341 assembly code that the Unixware assembler rejected. */
14342 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14343 else
14344 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14345 break;
14348 if (STACK_TOP_P (operands[0]))
14349 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14350 else
14351 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14352 break;
14354 case MINUS:
14355 case DIV:
14356 if (MEM_P (operands[1]))
14358 p = "r%Z1\t%1";
14359 break;
14362 if (MEM_P (operands[2]))
14364 p = "%Z2\t%2";
14365 break;
14368 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14370 #if SYSV386_COMPAT
14371 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14372 derived assemblers, confusingly reverse the direction of
14373 the operation for fsub{r} and fdiv{r} when the
14374 destination register is not st(0). The Intel assembler
14375 doesn't have this brain damage. Read !SYSV386_COMPAT to
14376 figure out what the hardware really does. */
14377 if (STACK_TOP_P (operands[0]))
14378 p = "{p\t%0, %2|rp\t%2, %0}";
14379 else
14380 p = "{rp\t%2, %0|p\t%0, %2}";
14381 #else
14382 if (STACK_TOP_P (operands[0]))
14383 /* As above for fmul/fadd, we can't store to st(0). */
14384 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14385 else
14386 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14387 #endif
14388 break;
14391 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14393 #if SYSV386_COMPAT
14394 if (STACK_TOP_P (operands[0]))
14395 p = "{rp\t%0, %1|p\t%1, %0}";
14396 else
14397 p = "{p\t%1, %0|rp\t%0, %1}";
14398 #else
14399 if (STACK_TOP_P (operands[0]))
14400 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14401 else
14402 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14403 #endif
14404 break;
14407 if (STACK_TOP_P (operands[0]))
14409 if (STACK_TOP_P (operands[1]))
14410 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14411 else
14412 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14413 break;
14415 else if (STACK_TOP_P (operands[1]))
14417 #if SYSV386_COMPAT
14418 p = "{\t%1, %0|r\t%0, %1}";
14419 #else
14420 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14421 #endif
14423 else
14425 #if SYSV386_COMPAT
14426 p = "{r\t%2, %0|\t%0, %2}";
14427 #else
14428 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14429 #endif
14431 break;
14433 default:
14434 gcc_unreachable ();
14437 strcat (buf, p);
14438 return buf;
14441 /* Return needed mode for entity in optimize_mode_switching pass. */
14443 static int
14444 ix86_dirflag_mode_needed (rtx_insn *insn)
14446 if (CALL_P (insn))
14448 if (cfun->machine->func_type == TYPE_NORMAL)
14449 return X86_DIRFLAG_ANY;
14450 else
14451 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14452 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14455 if (recog_memoized (insn) < 0)
14456 return X86_DIRFLAG_ANY;
14458 if (get_attr_type (insn) == TYPE_STR)
14460 /* Emit cld instruction if stringops are used in the function. */
14461 if (cfun->machine->func_type == TYPE_NORMAL)
14462 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14463 else
14464 return X86_DIRFLAG_RESET;
14467 return X86_DIRFLAG_ANY;
14470 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14472 static bool
14473 ix86_check_avx_upper_register (const_rtx exp)
14475 return (SSE_REG_P (exp)
14476 && !EXT_REX_SSE_REG_P (exp)
14477 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
14480 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14482 static void
14483 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14485 if (ix86_check_avx_upper_register (dest))
14487 bool *used = (bool *) data;
14488 *used = true;
14492 /* Return needed mode for entity in optimize_mode_switching pass. */
14494 static int
14495 ix86_avx_u128_mode_needed (rtx_insn *insn)
14497 if (DEBUG_INSN_P (insn))
14498 return AVX_U128_ANY;
14500 if (CALL_P (insn))
14502 rtx link;
14504 /* Needed mode is set to AVX_U128_CLEAN if there are
14505 no 256bit or 512bit modes used in function arguments. */
14506 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14507 link;
14508 link = XEXP (link, 1))
14510 if (GET_CODE (XEXP (link, 0)) == USE)
14512 rtx arg = XEXP (XEXP (link, 0), 0);
14514 if (ix86_check_avx_upper_register (arg))
14515 return AVX_U128_DIRTY;
14519 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
14520 nor 512bit registers used in the function return register. */
14521 bool avx_upper_reg_found = false;
14522 note_stores (insn, ix86_check_avx_upper_stores,
14523 &avx_upper_reg_found);
14524 if (avx_upper_reg_found)
14525 return AVX_U128_DIRTY;
14527 /* If the function is known to preserve some SSE registers,
14528 RA and previous passes can legitimately rely on that for
14529 modes wider than 256 bits. It's only safe to issue a
14530 vzeroupper if all SSE registers are clobbered. */
14531 const function_abi &abi = insn_callee_abi (insn);
14532 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
14533 || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14534 abi.mode_clobbers (V4DImode)))
14535 return AVX_U128_ANY;
14537 return AVX_U128_CLEAN;
14540 subrtx_iterator::array_type array;
14542 rtx set = single_set (insn);
14543 if (set)
14545 rtx dest = SET_DEST (set);
14546 rtx src = SET_SRC (set);
14547 if (ix86_check_avx_upper_register (dest))
14549 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
14550 source isn't zero. */
14551 if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
14552 return AVX_U128_DIRTY;
14553 else
14554 return AVX_U128_ANY;
14556 else
14558 FOR_EACH_SUBRTX (iter, array, src, NONCONST)
14559 if (ix86_check_avx_upper_register (*iter))
14560 return AVX_U128_DIRTY;
14563 /* This isn't YMM/ZMM load/store. */
14564 return AVX_U128_ANY;
14567 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14568 Hardware changes state only when a 256bit register is written to,
14569 but we need to prevent the compiler from moving optimal insertion
14570 point above eventual read from 256bit or 512 bit register. */
14571 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14572 if (ix86_check_avx_upper_register (*iter))
14573 return AVX_U128_DIRTY;
14575 return AVX_U128_ANY;
14578 /* Return mode that i387 must be switched into
14579 prior to the execution of insn. */
14581 static int
14582 ix86_i387_mode_needed (int entity, rtx_insn *insn)
14584 enum attr_i387_cw mode;
14586 /* The mode UNINITIALIZED is used to store control word after a
14587 function call or ASM pattern. The mode ANY specify that function
14588 has no requirements on the control word and make no changes in the
14589 bits we are interested in. */
14591 if (CALL_P (insn)
14592 || (NONJUMP_INSN_P (insn)
14593 && (asm_noperands (PATTERN (insn)) >= 0
14594 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14595 return I387_CW_UNINITIALIZED;
14597 if (recog_memoized (insn) < 0)
14598 return I387_CW_ANY;
14600 mode = get_attr_i387_cw (insn);
14602 switch (entity)
14604 case I387_ROUNDEVEN:
14605 if (mode == I387_CW_ROUNDEVEN)
14606 return mode;
14607 break;
14609 case I387_TRUNC:
14610 if (mode == I387_CW_TRUNC)
14611 return mode;
14612 break;
14614 case I387_FLOOR:
14615 if (mode == I387_CW_FLOOR)
14616 return mode;
14617 break;
14619 case I387_CEIL:
14620 if (mode == I387_CW_CEIL)
14621 return mode;
14622 break;
14624 default:
14625 gcc_unreachable ();
14628 return I387_CW_ANY;
14631 /* Return mode that entity must be switched into
14632 prior to the execution of insn. */
14634 static int
14635 ix86_mode_needed (int entity, rtx_insn *insn)
14637 switch (entity)
14639 case X86_DIRFLAG:
14640 return ix86_dirflag_mode_needed (insn);
14641 case AVX_U128:
14642 return ix86_avx_u128_mode_needed (insn);
14643 case I387_ROUNDEVEN:
14644 case I387_TRUNC:
14645 case I387_FLOOR:
14646 case I387_CEIL:
14647 return ix86_i387_mode_needed (entity, insn);
14648 default:
14649 gcc_unreachable ();
14651 return 0;
14654 /* Calculate mode of upper 128bit AVX registers after the insn. */
14656 static int
14657 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
14659 rtx pat = PATTERN (insn);
14661 if (vzeroupper_pattern (pat, VOIDmode)
14662 || vzeroall_pattern (pat, VOIDmode))
14663 return AVX_U128_CLEAN;
14665 /* We know that state is clean after CALL insn if there are no
14666 256bit or 512bit registers used in the function return register. */
14667 if (CALL_P (insn))
14669 bool avx_upper_reg_found = false;
14670 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
14672 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
14675 /* Otherwise, return current mode. Remember that if insn
14676 references AVX 256bit or 512bit registers, the mode was already
14677 changed to DIRTY from MODE_NEEDED. */
14678 return mode;
14681 /* Return the mode that an insn results in. */
14683 static int
14684 ix86_mode_after (int entity, int mode, rtx_insn *insn)
14686 switch (entity)
14688 case X86_DIRFLAG:
14689 return mode;
14690 case AVX_U128:
14691 return ix86_avx_u128_mode_after (mode, insn);
14692 case I387_ROUNDEVEN:
14693 case I387_TRUNC:
14694 case I387_FLOOR:
14695 case I387_CEIL:
14696 return mode;
14697 default:
14698 gcc_unreachable ();
14702 static int
14703 ix86_dirflag_mode_entry (void)
14705 /* For TARGET_CLD or in the interrupt handler we can't assume
14706 direction flag state at function entry. */
14707 if (TARGET_CLD
14708 || cfun->machine->func_type != TYPE_NORMAL)
14709 return X86_DIRFLAG_ANY;
14711 return X86_DIRFLAG_RESET;
14714 static int
14715 ix86_avx_u128_mode_entry (void)
14717 tree arg;
14719 /* Entry mode is set to AVX_U128_DIRTY if there are
14720 256bit or 512bit modes used in function arguments. */
14721 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14722 arg = TREE_CHAIN (arg))
14724 rtx incoming = DECL_INCOMING_RTL (arg);
14726 if (incoming && ix86_check_avx_upper_register (incoming))
14727 return AVX_U128_DIRTY;
14730 return AVX_U128_CLEAN;
14733 /* Return a mode that ENTITY is assumed to be
14734 switched to at function entry. */
14736 static int
14737 ix86_mode_entry (int entity)
14739 switch (entity)
14741 case X86_DIRFLAG:
14742 return ix86_dirflag_mode_entry ();
14743 case AVX_U128:
14744 return ix86_avx_u128_mode_entry ();
14745 case I387_ROUNDEVEN:
14746 case I387_TRUNC:
14747 case I387_FLOOR:
14748 case I387_CEIL:
14749 return I387_CW_ANY;
14750 default:
14751 gcc_unreachable ();
14755 static int
14756 ix86_avx_u128_mode_exit (void)
14758 rtx reg = crtl->return_rtx;
14760 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14761 or 512 bit modes used in the function return register. */
14762 if (reg && ix86_check_avx_upper_register (reg))
14763 return AVX_U128_DIRTY;
14765 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14766 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14768 return ix86_avx_u128_mode_entry ();
14771 /* Return a mode that ENTITY is assumed to be
14772 switched to at function exit. */
14774 static int
14775 ix86_mode_exit (int entity)
14777 switch (entity)
14779 case X86_DIRFLAG:
14780 return X86_DIRFLAG_ANY;
14781 case AVX_U128:
14782 return ix86_avx_u128_mode_exit ();
14783 case I387_ROUNDEVEN:
14784 case I387_TRUNC:
14785 case I387_FLOOR:
14786 case I387_CEIL:
14787 return I387_CW_ANY;
14788 default:
14789 gcc_unreachable ();
14793 static int
14794 ix86_mode_priority (int, int n)
14796 return n;
14799 /* Output code to initialize control word copies used by trunc?f?i and
14800 rounding patterns. CURRENT_MODE is set to current control word,
14801 while NEW_MODE is set to new control word. */
14803 static void
14804 emit_i387_cw_initialization (int mode)
14806 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14807 rtx new_mode;
14809 enum ix86_stack_slot slot;
14811 rtx reg = gen_reg_rtx (HImode);
14813 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14814 emit_move_insn (reg, copy_rtx (stored_mode));
14816 switch (mode)
14818 case I387_CW_ROUNDEVEN:
14819 /* round to nearest */
14820 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14821 slot = SLOT_CW_ROUNDEVEN;
14822 break;
14824 case I387_CW_TRUNC:
14825 /* round toward zero (truncate) */
14826 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14827 slot = SLOT_CW_TRUNC;
14828 break;
14830 case I387_CW_FLOOR:
14831 /* round down toward -oo */
14832 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14833 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14834 slot = SLOT_CW_FLOOR;
14835 break;
14837 case I387_CW_CEIL:
14838 /* round up toward +oo */
14839 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14840 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14841 slot = SLOT_CW_CEIL;
14842 break;
14844 default:
14845 gcc_unreachable ();
14848 gcc_assert (slot < MAX_386_STACK_LOCALS);
14850 new_mode = assign_386_stack_local (HImode, slot);
14851 emit_move_insn (new_mode, reg);
14854 /* Generate one or more insns to set ENTITY to MODE. */
14856 static void
14857 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14858 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14860 switch (entity)
14862 case X86_DIRFLAG:
14863 if (mode == X86_DIRFLAG_RESET)
14864 emit_insn (gen_cld ());
14865 break;
14866 case AVX_U128:
14867 if (mode == AVX_U128_CLEAN)
14868 ix86_expand_avx_vzeroupper ();
14869 break;
14870 case I387_ROUNDEVEN:
14871 case I387_TRUNC:
14872 case I387_FLOOR:
14873 case I387_CEIL:
14874 if (mode != I387_CW_ANY
14875 && mode != I387_CW_UNINITIALIZED)
14876 emit_i387_cw_initialization (mode);
14877 break;
14878 default:
14879 gcc_unreachable ();
14883 /* Output code for INSN to convert a float to a signed int. OPERANDS
14884 are the insn operands. The output may be [HSD]Imode and the input
14885 operand may be [SDX]Fmode. */
14887 const char *
14888 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14890 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14891 bool dimode_p = GET_MODE (operands[0]) == DImode;
14892 int round_mode = get_attr_i387_cw (insn);
14894 static char buf[40];
14895 const char *p;
14897 /* Jump through a hoop or two for DImode, since the hardware has no
14898 non-popping instruction. We used to do this a different way, but
14899 that was somewhat fragile and broke with post-reload splitters. */
14900 if ((dimode_p || fisttp) && !stack_top_dies)
14901 output_asm_insn ("fld\t%y1", operands);
14903 gcc_assert (STACK_TOP_P (operands[1]));
14904 gcc_assert (MEM_P (operands[0]));
14905 gcc_assert (GET_MODE (operands[1]) != TFmode);
14907 if (fisttp)
14908 return "fisttp%Z0\t%0";
14910 strcpy (buf, "fist");
14912 if (round_mode != I387_CW_ANY)
14913 output_asm_insn ("fldcw\t%3", operands);
14915 p = "p%Z0\t%0";
14916 strcat (buf, p + !(stack_top_dies || dimode_p));
14918 output_asm_insn (buf, operands);
14920 if (round_mode != I387_CW_ANY)
14921 output_asm_insn ("fldcw\t%2", operands);
14923 return "";
14926 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14927 have the values zero or one, indicates the ffreep insn's operand
14928 from the OPERANDS array. */
14930 static const char *
14931 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14933 if (TARGET_USE_FFREEP)
14934 #ifdef HAVE_AS_IX86_FFREEP
14935 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14936 #else
14938 static char retval[32];
14939 int regno = REGNO (operands[opno]);
14941 gcc_assert (STACK_REGNO_P (regno));
14943 regno -= FIRST_STACK_REG;
14945 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14946 return retval;
14948 #endif
14950 return opno ? "fstp\t%y1" : "fstp\t%y0";
14954 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14955 should be used. UNORDERED_P is true when fucom should be used. */
14957 const char *
14958 output_fp_compare (rtx_insn *insn, rtx *operands,
14959 bool eflags_p, bool unordered_p)
14961 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14962 bool stack_top_dies;
14964 static char buf[40];
14965 const char *p;
14967 gcc_assert (STACK_TOP_P (xops[0]));
14969 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14971 if (eflags_p)
14973 p = unordered_p ? "fucomi" : "fcomi";
14974 strcpy (buf, p);
14976 p = "p\t{%y1, %0|%0, %y1}";
14977 strcat (buf, p + !stack_top_dies);
14979 return buf;
14982 if (STACK_REG_P (xops[1])
14983 && stack_top_dies
14984 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14986 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14988 /* If both the top of the 387 stack die, and the other operand
14989 is also a stack register that dies, then this must be a
14990 `fcompp' float compare. */
14991 p = unordered_p ? "fucompp" : "fcompp";
14992 strcpy (buf, p);
14994 else if (const0_operand (xops[1], VOIDmode))
14996 gcc_assert (!unordered_p);
14997 strcpy (buf, "ftst");
14999 else
15001 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
15003 gcc_assert (!unordered_p);
15004 p = "ficom";
15006 else
15007 p = unordered_p ? "fucom" : "fcom";
15009 strcpy (buf, p);
15011 p = "p%Z2\t%y2";
15012 strcat (buf, p + !stack_top_dies);
15015 output_asm_insn (buf, operands);
15016 return "fnstsw\t%0";
15019 void
15020 ix86_output_addr_vec_elt (FILE *file, int value)
15022 const char *directive = ASM_LONG;
15024 #ifdef ASM_QUAD
15025 if (TARGET_LP64)
15026 directive = ASM_QUAD;
15027 #else
15028 gcc_assert (!TARGET_64BIT);
15029 #endif
15031 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15034 void
15035 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15037 const char *directive = ASM_LONG;
15039 #ifdef ASM_QUAD
15040 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15041 directive = ASM_QUAD;
15042 #else
15043 gcc_assert (!TARGET_64BIT);
15044 #endif
15045 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15046 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15047 fprintf (file, "%s%s%d-%s%d\n",
15048 directive, LPREFIX, value, LPREFIX, rel);
15049 #if TARGET_MACHO
15050 else if (TARGET_MACHO)
15052 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15053 machopic_output_function_base_name (file);
15054 putc ('\n', file);
15056 #endif
15057 else if (HAVE_AS_GOTOFF_IN_DATA)
15058 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15059 else
15060 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15061 GOT_SYMBOL_NAME, LPREFIX, value);
15064 #define LEA_MAX_STALL (3)
15065 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15067 /* Increase given DISTANCE in half-cycles according to
15068 dependencies between PREV and NEXT instructions.
15069 Add 1 half-cycle if there is no dependency and
15070 go to next cycle if there is some dependecy. */
15072 static unsigned int
15073 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15075 df_ref def, use;
15077 if (!prev || !next)
15078 return distance + (distance & 1) + 2;
15080 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15081 return distance + 1;
15083 FOR_EACH_INSN_USE (use, next)
15084 FOR_EACH_INSN_DEF (def, prev)
15085 if (!DF_REF_IS_ARTIFICIAL (def)
15086 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15087 return distance + (distance & 1) + 2;
15089 return distance + 1;
15092 /* Function checks if instruction INSN defines register number
15093 REGNO1 or REGNO2. */
15095 bool
15096 insn_defines_reg (unsigned int regno1, unsigned int regno2,
15097 rtx_insn *insn)
15099 df_ref def;
15101 FOR_EACH_INSN_DEF (def, insn)
15102 if (DF_REF_REG_DEF_P (def)
15103 && !DF_REF_IS_ARTIFICIAL (def)
15104 && (regno1 == DF_REF_REGNO (def)
15105 || regno2 == DF_REF_REGNO (def)))
15106 return true;
15108 return false;
15111 /* Function checks if instruction INSN uses register number
15112 REGNO as a part of address expression. */
15114 static bool
15115 insn_uses_reg_mem (unsigned int regno, rtx insn)
15117 df_ref use;
15119 FOR_EACH_INSN_USE (use, insn)
15120 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15121 return true;
15123 return false;
15126 /* Search backward for non-agu definition of register number REGNO1
15127 or register number REGNO2 in basic block starting from instruction
15128 START up to head of basic block or instruction INSN.
15130 Function puts true value into *FOUND var if definition was found
15131 and false otherwise.
15133 Distance in half-cycles between START and found instruction or head
15134 of BB is added to DISTANCE and returned. */
15136 static int
15137 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15138 rtx_insn *insn, int distance,
15139 rtx_insn *start, bool *found)
15141 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
15142 rtx_insn *prev = start;
15143 rtx_insn *next = NULL;
15145 *found = false;
15147 while (prev
15148 && prev != insn
15149 && distance < LEA_SEARCH_THRESHOLD)
15151 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15153 distance = increase_distance (prev, next, distance);
15154 if (insn_defines_reg (regno1, regno2, prev))
15156 if (recog_memoized (prev) < 0
15157 || get_attr_type (prev) != TYPE_LEA)
15159 *found = true;
15160 return distance;
15164 next = prev;
15166 if (prev == BB_HEAD (bb))
15167 break;
15169 prev = PREV_INSN (prev);
15172 return distance;
15175 /* Search backward for non-agu definition of register number REGNO1
15176 or register number REGNO2 in INSN's basic block until
15177 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15178 2. Reach neighbor BBs boundary, or
15179 3. Reach agu definition.
15180 Returns the distance between the non-agu definition point and INSN.
15181 If no definition point, returns -1. */
15183 static int
15184 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15185 rtx_insn *insn)
15187 basic_block bb = BLOCK_FOR_INSN (insn);
15188 int distance = 0;
15189 bool found = false;
15191 if (insn != BB_HEAD (bb))
15192 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15193 distance, PREV_INSN (insn),
15194 &found);
15196 if (!found && distance < LEA_SEARCH_THRESHOLD)
15198 edge e;
15199 edge_iterator ei;
15200 bool simple_loop = false;
15202 FOR_EACH_EDGE (e, ei, bb->preds)
15203 if (e->src == bb)
15205 simple_loop = true;
15206 break;
15209 if (simple_loop)
15210 distance = distance_non_agu_define_in_bb (regno1, regno2,
15211 insn, distance,
15212 BB_END (bb), &found);
15213 else
15215 int shortest_dist = -1;
15216 bool found_in_bb = false;
15218 FOR_EACH_EDGE (e, ei, bb->preds)
15220 int bb_dist
15221 = distance_non_agu_define_in_bb (regno1, regno2,
15222 insn, distance,
15223 BB_END (e->src),
15224 &found_in_bb);
15225 if (found_in_bb)
15227 if (shortest_dist < 0)
15228 shortest_dist = bb_dist;
15229 else if (bb_dist > 0)
15230 shortest_dist = MIN (bb_dist, shortest_dist);
15232 found = true;
15236 distance = shortest_dist;
15240 if (!found)
15241 return -1;
15243 return distance >> 1;
15246 /* Return the distance in half-cycles between INSN and the next
15247 insn that uses register number REGNO in memory address added
15248 to DISTANCE. Return -1 if REGNO0 is set.
15250 Put true value into *FOUND if register usage was found and
15251 false otherwise.
15252 Put true value into *REDEFINED if register redefinition was
15253 found and false otherwise. */
15255 static int
15256 distance_agu_use_in_bb (unsigned int regno,
15257 rtx_insn *insn, int distance, rtx_insn *start,
15258 bool *found, bool *redefined)
15260 basic_block bb = NULL;
15261 rtx_insn *next = start;
15262 rtx_insn *prev = NULL;
15264 *found = false;
15265 *redefined = false;
15267 if (start != NULL_RTX)
15269 bb = BLOCK_FOR_INSN (start);
15270 if (start != BB_HEAD (bb))
15271 /* If insn and start belong to the same bb, set prev to insn,
15272 so the call to increase_distance will increase the distance
15273 between insns by 1. */
15274 prev = insn;
15277 while (next
15278 && next != insn
15279 && distance < LEA_SEARCH_THRESHOLD)
15281 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15283 distance = increase_distance(prev, next, distance);
15284 if (insn_uses_reg_mem (regno, next))
15286 /* Return DISTANCE if OP0 is used in memory
15287 address in NEXT. */
15288 *found = true;
15289 return distance;
15292 if (insn_defines_reg (regno, INVALID_REGNUM, next))
15294 /* Return -1 if OP0 is set in NEXT. */
15295 *redefined = true;
15296 return -1;
15299 prev = next;
15302 if (next == BB_END (bb))
15303 break;
15305 next = NEXT_INSN (next);
15308 return distance;
15311 /* Return the distance between INSN and the next insn that uses
15312 register number REGNO0 in memory address. Return -1 if no such
15313 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15315 static int
15316 distance_agu_use (unsigned int regno0, rtx_insn *insn)
15318 basic_block bb = BLOCK_FOR_INSN (insn);
15319 int distance = 0;
15320 bool found = false;
15321 bool redefined = false;
15323 if (insn != BB_END (bb))
15324 distance = distance_agu_use_in_bb (regno0, insn, distance,
15325 NEXT_INSN (insn),
15326 &found, &redefined);
15328 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15330 edge e;
15331 edge_iterator ei;
15332 bool simple_loop = false;
15334 FOR_EACH_EDGE (e, ei, bb->succs)
15335 if (e->dest == bb)
15337 simple_loop = true;
15338 break;
15341 if (simple_loop)
15342 distance = distance_agu_use_in_bb (regno0, insn,
15343 distance, BB_HEAD (bb),
15344 &found, &redefined);
15345 else
15347 int shortest_dist = -1;
15348 bool found_in_bb = false;
15349 bool redefined_in_bb = false;
15351 FOR_EACH_EDGE (e, ei, bb->succs)
15353 int bb_dist
15354 = distance_agu_use_in_bb (regno0, insn,
15355 distance, BB_HEAD (e->dest),
15356 &found_in_bb, &redefined_in_bb);
15357 if (found_in_bb)
15359 if (shortest_dist < 0)
15360 shortest_dist = bb_dist;
15361 else if (bb_dist > 0)
15362 shortest_dist = MIN (bb_dist, shortest_dist);
15364 found = true;
15368 distance = shortest_dist;
15372 if (!found || redefined)
15373 return -1;
15375 return distance >> 1;
15378 /* Define this macro to tune LEA priority vs ADD, it take effect when
15379 there is a dilemma of choosing LEA or ADD
15380 Negative value: ADD is more preferred than LEA
15381 Zero: Neutral
15382 Positive value: LEA is more preferred than ADD. */
15383 #define IX86_LEA_PRIORITY 0
15385 /* Return true if usage of lea INSN has performance advantage
15386 over a sequence of instructions. Instructions sequence has
15387 SPLIT_COST cycles higher latency than lea latency. */
15389 static bool
15390 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15391 unsigned int regno2, int split_cost, bool has_scale)
15393 int dist_define, dist_use;
15395 /* For Atom processors newer than Bonnell, if using a 2-source or
15396 3-source LEA for non-destructive destination purposes, or due to
15397 wanting ability to use SCALE, the use of LEA is justified. */
15398 if (!TARGET_CPU_P (BONNELL))
15400 if (has_scale)
15401 return true;
15402 if (split_cost < 1)
15403 return false;
15404 if (regno0 == regno1 || regno0 == regno2)
15405 return false;
15406 return true;
15409 /* Remember recog_data content. */
15410 struct recog_data_d recog_data_save = recog_data;
15412 dist_define = distance_non_agu_define (regno1, regno2, insn);
15413 dist_use = distance_agu_use (regno0, insn);
15415 /* distance_non_agu_define can call get_attr_type which can call
15416 recog_memoized, restore recog_data back to previous content. */
15417 recog_data = recog_data_save;
15419 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
15421 /* If there is no non AGU operand definition, no AGU
15422 operand usage and split cost is 0 then both lea
15423 and non lea variants have same priority. Currently
15424 we prefer lea for 64 bit code and non lea on 32 bit
15425 code. */
15426 if (dist_use < 0 && split_cost == 0)
15427 return TARGET_64BIT || IX86_LEA_PRIORITY;
15428 else
15429 return true;
15432 /* With longer definitions distance lea is more preferable.
15433 Here we change it to take into account splitting cost and
15434 lea priority. */
15435 dist_define += split_cost + IX86_LEA_PRIORITY;
15437 /* If there is no use in memory addess then we just check
15438 that split cost exceeds AGU stall. */
15439 if (dist_use < 0)
15440 return dist_define > LEA_MAX_STALL;
15442 /* If this insn has both backward non-agu dependence and forward
15443 agu dependence, the one with short distance takes effect. */
15444 return dist_define >= dist_use;
15447 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15448 move and add to avoid AGU stalls. */
15450 bool
15451 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15453 unsigned int regno0, regno1, regno2;
15455 /* Check if we need to optimize. */
15456 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15457 return false;
15459 regno0 = true_regnum (operands[0]);
15460 regno1 = true_regnum (operands[1]);
15461 regno2 = true_regnum (operands[2]);
15463 /* We need to split only adds with non destructive
15464 destination operand. */
15465 if (regno0 == regno1 || regno0 == regno2)
15466 return false;
15467 else
15468 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
15471 /* Return true if we should emit lea instruction instead of mov
15472 instruction. */
15474 bool
15475 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15477 unsigned int regno0, regno1;
15479 /* Check if we need to optimize. */
15480 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15481 return false;
15483 /* Use lea for reg to reg moves only. */
15484 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15485 return false;
15487 regno0 = true_regnum (operands[0]);
15488 regno1 = true_regnum (operands[1]);
15490 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
15493 /* Return true if we need to split lea into a sequence of
15494 instructions to avoid AGU stalls during peephole2. */
15496 bool
15497 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15499 unsigned int regno0, regno1, regno2;
15500 int split_cost;
15501 struct ix86_address parts;
15502 int ok;
15504 /* The "at least two components" test below might not catch simple
15505 move or zero extension insns if parts.base is non-NULL and parts.disp
15506 is const0_rtx as the only components in the address, e.g. if the
15507 register is %rbp or %r13. As this test is much cheaper and moves or
15508 zero extensions are the common case, do this check first. */
15509 if (REG_P (operands[1])
15510 || (SImode_address_operand (operands[1], VOIDmode)
15511 && REG_P (XEXP (operands[1], 0))))
15512 return false;
15514 ok = ix86_decompose_address (operands[1], &parts);
15515 gcc_assert (ok);
15517 /* There should be at least two components in the address. */
15518 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15519 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15520 return false;
15522 /* We should not split into add if non legitimate pic
15523 operand is used as displacement. */
15524 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15525 return false;
15527 regno0 = true_regnum (operands[0]) ;
15528 regno1 = INVALID_REGNUM;
15529 regno2 = INVALID_REGNUM;
15531 if (parts.base)
15532 regno1 = true_regnum (parts.base);
15533 if (parts.index)
15534 regno2 = true_regnum (parts.index);
15536 /* Use add for a = a + b and a = b + a since it is faster and shorter
15537 than lea for most processors. For the processors like BONNELL, if
15538 the destination register of LEA holds an actual address which will
15539 be used soon, LEA is better and otherwise ADD is better. */
15540 if (!TARGET_CPU_P (BONNELL)
15541 && parts.scale == 1
15542 && (!parts.disp || parts.disp == const0_rtx)
15543 && (regno0 == regno1 || regno0 == regno2))
15544 return true;
15546 /* Check we need to optimize. */
15547 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
15548 return false;
15550 split_cost = 0;
15552 /* Compute how many cycles we will add to execution time
15553 if split lea into a sequence of instructions. */
15554 if (parts.base || parts.index)
15556 /* Have to use mov instruction if non desctructive
15557 destination form is used. */
15558 if (regno1 != regno0 && regno2 != regno0)
15559 split_cost += 1;
15561 /* Have to add index to base if both exist. */
15562 if (parts.base && parts.index)
15563 split_cost += 1;
15565 /* Have to use shift and adds if scale is 2 or greater. */
15566 if (parts.scale > 1)
15568 if (regno0 != regno1)
15569 split_cost += 1;
15570 else if (regno2 == regno0)
15571 split_cost += 4;
15572 else
15573 split_cost += parts.scale;
15576 /* Have to use add instruction with immediate if
15577 disp is non zero. */
15578 if (parts.disp && parts.disp != const0_rtx)
15579 split_cost += 1;
15581 /* Subtract the price of lea. */
15582 split_cost -= 1;
15585 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
15586 parts.scale > 1);
15589 /* Return true if it is ok to optimize an ADD operation to LEA
15590 operation to avoid flag register consumation. For most processors,
15591 ADD is faster than LEA. For the processors like BONNELL, if the
15592 destination register of LEA holds an actual address which will be
15593 used soon, LEA is better and otherwise ADD is better. */
15595 bool
15596 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
15598 unsigned int regno0 = true_regnum (operands[0]);
15599 unsigned int regno1 = true_regnum (operands[1]);
15600 unsigned int regno2 = true_regnum (operands[2]);
15602 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15603 if (regno0 != regno1 && regno0 != regno2)
15604 return true;
15606 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15607 return false;
15609 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
15612 /* Return true if destination reg of SET_BODY is shift count of
15613 USE_BODY. */
15615 static bool
15616 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15618 rtx set_dest;
15619 rtx shift_rtx;
15620 int i;
15622 /* Retrieve destination of SET_BODY. */
15623 switch (GET_CODE (set_body))
15625 case SET:
15626 set_dest = SET_DEST (set_body);
15627 if (!set_dest || !REG_P (set_dest))
15628 return false;
15629 break;
15630 case PARALLEL:
15631 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15632 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15633 use_body))
15634 return true;
15635 /* FALLTHROUGH */
15636 default:
15637 return false;
15640 /* Retrieve shift count of USE_BODY. */
15641 switch (GET_CODE (use_body))
15643 case SET:
15644 shift_rtx = XEXP (use_body, 1);
15645 break;
15646 case PARALLEL:
15647 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15648 if (ix86_dep_by_shift_count_body (set_body,
15649 XVECEXP (use_body, 0, i)))
15650 return true;
15651 /* FALLTHROUGH */
15652 default:
15653 return false;
15656 if (shift_rtx
15657 && (GET_CODE (shift_rtx) == ASHIFT
15658 || GET_CODE (shift_rtx) == LSHIFTRT
15659 || GET_CODE (shift_rtx) == ASHIFTRT
15660 || GET_CODE (shift_rtx) == ROTATE
15661 || GET_CODE (shift_rtx) == ROTATERT))
15663 rtx shift_count = XEXP (shift_rtx, 1);
15665 /* Return true if shift count is dest of SET_BODY. */
15666 if (REG_P (shift_count))
15668 /* Add check since it can be invoked before register
15669 allocation in pre-reload schedule. */
15670 if (reload_completed
15671 && true_regnum (set_dest) == true_regnum (shift_count))
15672 return true;
15673 else if (REGNO(set_dest) == REGNO(shift_count))
15674 return true;
15678 return false;
15681 /* Return true if destination reg of SET_INSN is shift count of
15682 USE_INSN. */
15684 bool
15685 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15687 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15688 PATTERN (use_insn));
15691 /* Return TRUE or FALSE depending on whether the unary operator meets the
15692 appropriate constraints. */
15694 bool
15695 ix86_unary_operator_ok (enum rtx_code,
15696 machine_mode,
15697 rtx operands[2])
15699 /* If one of operands is memory, source and destination must match. */
15700 if ((MEM_P (operands[0])
15701 || MEM_P (operands[1]))
15702 && ! rtx_equal_p (operands[0], operands[1]))
15703 return false;
15704 return true;
15707 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15708 are ok, keeping in mind the possible movddup alternative. */
15710 bool
15711 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15713 if (MEM_P (operands[0]))
15714 return rtx_equal_p (operands[0], operands[1 + high]);
15715 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15716 return false;
15717 return true;
15720 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15721 then replicate the value for all elements of the vector
15722 register. */
15725 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15727 int i, n_elt;
15728 rtvec v;
15729 machine_mode scalar_mode;
15731 switch (mode)
15733 case E_V64QImode:
15734 case E_V32QImode:
15735 case E_V16QImode:
15736 case E_V32HImode:
15737 case E_V16HImode:
15738 case E_V8HImode:
15739 case E_V16SImode:
15740 case E_V8SImode:
15741 case E_V4SImode:
15742 case E_V2SImode:
15743 case E_V8DImode:
15744 case E_V4DImode:
15745 case E_V2DImode:
15746 gcc_assert (vect);
15747 /* FALLTHRU */
15748 case E_V8HFmode:
15749 case E_V16HFmode:
15750 case E_V32HFmode:
15751 case E_V16SFmode:
15752 case E_V8SFmode:
15753 case E_V4SFmode:
15754 case E_V2SFmode:
15755 case E_V8DFmode:
15756 case E_V4DFmode:
15757 case E_V2DFmode:
15758 n_elt = GET_MODE_NUNITS (mode);
15759 v = rtvec_alloc (n_elt);
15760 scalar_mode = GET_MODE_INNER (mode);
15762 RTVEC_ELT (v, 0) = value;
15764 for (i = 1; i < n_elt; ++i)
15765 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15767 return gen_rtx_CONST_VECTOR (mode, v);
15769 default:
15770 gcc_unreachable ();
15774 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15775 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15776 for an SSE register. If VECT is true, then replicate the mask for
15777 all elements of the vector register. If INVERT is true, then create
15778 a mask excluding the sign bit. */
15781 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15783 machine_mode vec_mode, imode;
15784 wide_int w;
15785 rtx mask, v;
15787 switch (mode)
15789 case E_V8HFmode:
15790 case E_V16HFmode:
15791 case E_V32HFmode:
15792 vec_mode = mode;
15793 imode = HImode;
15794 break;
15796 case E_V16SImode:
15797 case E_V16SFmode:
15798 case E_V8SImode:
15799 case E_V4SImode:
15800 case E_V8SFmode:
15801 case E_V4SFmode:
15802 case E_V2SFmode:
15803 case E_V2SImode:
15804 vec_mode = mode;
15805 imode = SImode;
15806 break;
15808 case E_V8DImode:
15809 case E_V4DImode:
15810 case E_V2DImode:
15811 case E_V8DFmode:
15812 case E_V4DFmode:
15813 case E_V2DFmode:
15814 vec_mode = mode;
15815 imode = DImode;
15816 break;
15818 case E_TImode:
15819 case E_TFmode:
15820 vec_mode = VOIDmode;
15821 imode = TImode;
15822 break;
15824 default:
15825 gcc_unreachable ();
15828 machine_mode inner_mode = GET_MODE_INNER (mode);
15829 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15830 GET_MODE_BITSIZE (inner_mode));
15831 if (invert)
15832 w = wi::bit_not (w);
15834 /* Force this value into the low part of a fp vector constant. */
15835 mask = immed_wide_int_const (w, imode);
15836 mask = gen_lowpart (inner_mode, mask);
15838 if (vec_mode == VOIDmode)
15839 return force_reg (inner_mode, mask);
15841 v = ix86_build_const_vector (vec_mode, vect, mask);
15842 return force_reg (vec_mode, v);
15845 /* Return HOST_WIDE_INT for const vector OP in MODE. */
15847 HOST_WIDE_INT
15848 ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
15850 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15851 gcc_unreachable ();
15853 int nunits = GET_MODE_NUNITS (mode);
15854 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
15855 machine_mode innermode = GET_MODE_INNER (mode);
15856 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
15858 switch (mode)
15860 case E_V2QImode:
15861 case E_V4QImode:
15862 case E_V2HImode:
15863 case E_V8QImode:
15864 case E_V4HImode:
15865 case E_V2SImode:
15866 for (int i = 0; i < nunits; ++i)
15868 int v = INTVAL (XVECEXP (op, 0, i));
15869 wide_int wv = wi::shwi (v, innermode_bits);
15870 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
15872 break;
15873 case E_V2HFmode:
15874 case E_V2BFmode:
15875 case E_V4HFmode:
15876 case E_V4BFmode:
15877 case E_V2SFmode:
15878 for (int i = 0; i < nunits; ++i)
15880 rtx x = XVECEXP (op, 0, i);
15881 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
15882 REAL_MODE_FORMAT (innermode));
15883 wide_int wv = wi::shwi (v, innermode_bits);
15884 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
15886 break;
15887 default:
15888 gcc_unreachable ();
15891 return val.to_shwi ();
15894 /* Return TRUE or FALSE depending on whether the first SET in INSN
15895 has source and destination with matching CC modes, and that the
15896 CC mode is at least as constrained as REQ_MODE. */
15898 bool
15899 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15901 rtx set;
15902 machine_mode set_mode;
15904 set = PATTERN (insn);
15905 if (GET_CODE (set) == PARALLEL)
15906 set = XVECEXP (set, 0, 0);
15907 gcc_assert (GET_CODE (set) == SET);
15908 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15910 set_mode = GET_MODE (SET_DEST (set));
15911 switch (set_mode)
15913 case E_CCNOmode:
15914 if (req_mode != CCNOmode
15915 && (req_mode != CCmode
15916 || XEXP (SET_SRC (set), 1) != const0_rtx))
15917 return false;
15918 break;
15919 case E_CCmode:
15920 if (req_mode == CCGCmode)
15921 return false;
15922 /* FALLTHRU */
15923 case E_CCGCmode:
15924 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15925 return false;
15926 /* FALLTHRU */
15927 case E_CCGOCmode:
15928 if (req_mode == CCZmode)
15929 return false;
15930 /* FALLTHRU */
15931 case E_CCZmode:
15932 break;
15934 case E_CCGZmode:
15936 case E_CCAmode:
15937 case E_CCCmode:
15938 case E_CCOmode:
15939 case E_CCPmode:
15940 case E_CCSmode:
15941 if (set_mode != req_mode)
15942 return false;
15943 break;
15945 default:
15946 gcc_unreachable ();
15949 return GET_MODE (SET_SRC (set)) == set_mode;
15952 machine_mode
15953 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15955 machine_mode mode = GET_MODE (op0);
15957 if (SCALAR_FLOAT_MODE_P (mode))
15959 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15960 return CCFPmode;
15963 switch (code)
15965 /* Only zero flag is needed. */
15966 case EQ: /* ZF=0 */
15967 case NE: /* ZF!=0 */
15968 return CCZmode;
15969 /* Codes needing carry flag. */
15970 case GEU: /* CF=0 */
15971 case LTU: /* CF=1 */
15972 rtx geu;
15973 /* Detect overflow checks. They need just the carry flag. */
15974 if (GET_CODE (op0) == PLUS
15975 && (rtx_equal_p (op1, XEXP (op0, 0))
15976 || rtx_equal_p (op1, XEXP (op0, 1))))
15977 return CCCmode;
15978 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15979 Match LTU of op0
15980 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15981 and op1
15982 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15983 where CC_CCC is either CC or CCC. */
15984 else if (code == LTU
15985 && GET_CODE (op0) == NEG
15986 && GET_CODE (geu = XEXP (op0, 0)) == GEU
15987 && REG_P (XEXP (geu, 0))
15988 && (GET_MODE (XEXP (geu, 0)) == CCCmode
15989 || GET_MODE (XEXP (geu, 0)) == CCmode)
15990 && REGNO (XEXP (geu, 0)) == FLAGS_REG
15991 && XEXP (geu, 1) == const0_rtx
15992 && GET_CODE (op1) == LTU
15993 && REG_P (XEXP (op1, 0))
15994 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
15995 && REGNO (XEXP (op1, 0)) == FLAGS_REG
15996 && XEXP (op1, 1) == const0_rtx)
15997 return CCCmode;
15998 /* Similarly for *x86_cmc pattern.
15999 Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
16000 and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
16001 It is sufficient to test that the operand modes are CCCmode. */
16002 else if (code == LTU
16003 && GET_CODE (op0) == NEG
16004 && GET_CODE (XEXP (op0, 0)) == LTU
16005 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
16006 && GET_CODE (op1) == GEU
16007 && GET_MODE (XEXP (op1, 0)) == CCCmode)
16008 return CCCmode;
16009 else
16010 return CCmode;
16011 case GTU: /* CF=0 & ZF=0 */
16012 case LEU: /* CF=1 | ZF=1 */
16013 return CCmode;
16014 /* Codes possibly doable only with sign flag when
16015 comparing against zero. */
16016 case GE: /* SF=OF or SF=0 */
16017 case LT: /* SF<>OF or SF=1 */
16018 if (op1 == const0_rtx)
16019 return CCGOCmode;
16020 else
16021 /* For other cases Carry flag is not required. */
16022 return CCGCmode;
16023 /* Codes doable only with sign flag when comparing
16024 against zero, but we miss jump instruction for it
16025 so we need to use relational tests against overflow
16026 that thus needs to be zero. */
16027 case GT: /* ZF=0 & SF=OF */
16028 case LE: /* ZF=1 | SF<>OF */
16029 if (op1 == const0_rtx)
16030 return CCNOmode;
16031 else
16032 return CCGCmode;
16033 /* strcmp pattern do (use flags) and combine may ask us for proper
16034 mode. */
16035 case USE:
16036 return CCmode;
16037 default:
16038 gcc_unreachable ();
16042 /* Return TRUE or FALSE depending on whether the ptest instruction
16043 INSN has source and destination with suitable matching CC modes. */
16045 bool
16046 ix86_match_ptest_ccmode (rtx insn)
16048 rtx set, src;
16049 machine_mode set_mode;
16051 set = PATTERN (insn);
16052 gcc_assert (GET_CODE (set) == SET);
16053 src = SET_SRC (set);
16054 gcc_assert (GET_CODE (src) == UNSPEC
16055 && XINT (src, 1) == UNSPEC_PTEST);
16057 set_mode = GET_MODE (src);
16058 if (set_mode != CCZmode
16059 && set_mode != CCCmode
16060 && set_mode != CCmode)
16061 return false;
16062 return GET_MODE (SET_DEST (set)) == set_mode;
16065 /* Return the fixed registers used for condition codes. */
16067 static bool
16068 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16070 *p1 = FLAGS_REG;
16071 *p2 = INVALID_REGNUM;
16072 return true;
16075 /* If two condition code modes are compatible, return a condition code
16076 mode which is compatible with both. Otherwise, return
16077 VOIDmode. */
16079 static machine_mode
16080 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16082 if (m1 == m2)
16083 return m1;
16085 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16086 return VOIDmode;
16088 if ((m1 == CCGCmode && m2 == CCGOCmode)
16089 || (m1 == CCGOCmode && m2 == CCGCmode))
16090 return CCGCmode;
16092 if ((m1 == CCNOmode && m2 == CCGOCmode)
16093 || (m1 == CCGOCmode && m2 == CCNOmode))
16094 return CCNOmode;
16096 if (m1 == CCZmode
16097 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16098 return m2;
16099 else if (m2 == CCZmode
16100 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16101 return m1;
16103 switch (m1)
16105 default:
16106 gcc_unreachable ();
16108 case E_CCmode:
16109 case E_CCGCmode:
16110 case E_CCGOCmode:
16111 case E_CCNOmode:
16112 case E_CCAmode:
16113 case E_CCCmode:
16114 case E_CCOmode:
16115 case E_CCPmode:
16116 case E_CCSmode:
16117 case E_CCZmode:
16118 switch (m2)
16120 default:
16121 return VOIDmode;
16123 case E_CCmode:
16124 case E_CCGCmode:
16125 case E_CCGOCmode:
16126 case E_CCNOmode:
16127 case E_CCAmode:
16128 case E_CCCmode:
16129 case E_CCOmode:
16130 case E_CCPmode:
16131 case E_CCSmode:
16132 case E_CCZmode:
16133 return CCmode;
16136 case E_CCFPmode:
16137 /* These are only compatible with themselves, which we already
16138 checked above. */
16139 return VOIDmode;
16143 /* Return strategy to use for floating-point. We assume that fcomi is always
16144 preferrable where available, since that is also true when looking at size
16145 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16147 enum ix86_fpcmp_strategy
16148 ix86_fp_comparison_strategy (enum rtx_code)
16150 /* Do fcomi/sahf based test when profitable. */
16152 if (TARGET_CMOVE)
16153 return IX86_FPCMP_COMI;
16155 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16156 return IX86_FPCMP_SAHF;
16158 return IX86_FPCMP_ARITH;
16161 /* Convert comparison codes we use to represent FP comparison to integer
16162 code that will result in proper branch. Return UNKNOWN if no such code
16163 is available. */
16165 enum rtx_code
16166 ix86_fp_compare_code_to_integer (enum rtx_code code)
16168 switch (code)
16170 case GT:
16171 return GTU;
16172 case GE:
16173 return GEU;
16174 case ORDERED:
16175 case UNORDERED:
16176 return code;
16177 case UNEQ:
16178 return EQ;
16179 case UNLT:
16180 return LTU;
16181 case UNLE:
16182 return LEU;
16183 case LTGT:
16184 return NE;
16185 default:
16186 return UNKNOWN;
16190 /* Zero extend possibly SImode EXP to Pmode register. */
16192 ix86_zero_extend_to_Pmode (rtx exp)
16194 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16197 /* Return true if the function is called via PLT. */
16199 bool
16200 ix86_call_use_plt_p (rtx call_op)
16202 if (SYMBOL_REF_LOCAL_P (call_op))
16204 if (SYMBOL_REF_DECL (call_op)
16205 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16207 /* NB: All ifunc functions must be called via PLT. */
16208 cgraph_node *node
16209 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16210 if (node && node->ifunc_resolver)
16211 return true;
16213 return false;
16215 return true;
16218 /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16219 the PLT entry will be used as the function address for local IFUNC
16220 functions. When the PIC register is needed for PLT call, indirect
16221 call via the PLT entry will fail since the PIC register may not be
16222 set up properly for indirect call. In this case, we should return
16223 false. */
16225 static bool
16226 ix86_ifunc_ref_local_ok (void)
16228 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16231 /* Return true if the function being called was marked with attribute
16232 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16233 to handle the non-PIC case in the backend because there is no easy
16234 interface for the front-end to force non-PLT calls to use the GOT.
16235 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16236 to call the function marked "noplt" indirectly. */
16238 static bool
16239 ix86_nopic_noplt_attribute_p (rtx call_op)
16241 if (flag_pic || ix86_cmodel == CM_LARGE
16242 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16243 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16244 || SYMBOL_REF_LOCAL_P (call_op))
16245 return false;
16247 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16249 if (!flag_plt
16250 || (symbol_decl != NULL_TREE
16251 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
16252 return true;
16254 return false;
16257 /* Helper to output the jmp/call. */
16258 static void
16259 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16261 if (thunk_name != NULL)
16263 if (REX_INT_REGNO_P (regno)
16264 && ix86_indirect_branch_cs_prefix)
16265 fprintf (asm_out_file, "\tcs\n");
16266 fprintf (asm_out_file, "\tjmp\t");
16267 assemble_name (asm_out_file, thunk_name);
16268 putc ('\n', asm_out_file);
16269 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16270 fputs ("\tint3\n", asm_out_file);
16272 else
16273 output_indirect_thunk (regno);
16276 /* Output indirect branch via a call and return thunk. CALL_OP is a
16277 register which contains the branch target. XASM is the assembly
16278 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16279 A normal call is converted to:
16281 call __x86_indirect_thunk_reg
16283 and a tail call is converted to:
16285 jmp __x86_indirect_thunk_reg
16288 static void
16289 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16291 char thunk_name_buf[32];
16292 char *thunk_name;
16293 enum indirect_thunk_prefix need_prefix
16294 = indirect_thunk_need_prefix (current_output_insn);
16295 int regno = REGNO (call_op);
16297 if (cfun->machine->indirect_branch_type
16298 != indirect_branch_thunk_inline)
16300 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16301 SET_HARD_REG_BIT (indirect_thunks_used, regno);
16303 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16304 thunk_name = thunk_name_buf;
16306 else
16307 thunk_name = NULL;
16309 if (sibcall_p)
16310 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16311 else
16313 if (thunk_name != NULL)
16315 if (REX_INT_REGNO_P (regno)
16316 && ix86_indirect_branch_cs_prefix)
16317 fprintf (asm_out_file, "\tcs\n");
16318 fprintf (asm_out_file, "\tcall\t");
16319 assemble_name (asm_out_file, thunk_name);
16320 putc ('\n', asm_out_file);
16321 return;
16324 char indirectlabel1[32];
16325 char indirectlabel2[32];
16327 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16328 INDIRECT_LABEL,
16329 indirectlabelno++);
16330 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16331 INDIRECT_LABEL,
16332 indirectlabelno++);
16334 /* Jump. */
16335 fputs ("\tjmp\t", asm_out_file);
16336 assemble_name_raw (asm_out_file, indirectlabel2);
16337 fputc ('\n', asm_out_file);
16339 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16341 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16343 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16345 /* Call. */
16346 fputs ("\tcall\t", asm_out_file);
16347 assemble_name_raw (asm_out_file, indirectlabel1);
16348 fputc ('\n', asm_out_file);
16352 /* Output indirect branch via a call and return thunk. CALL_OP is
16353 the branch target. XASM is the assembly template for CALL_OP.
16354 Branch is a tail call if SIBCALL_P is true. A normal call is
16355 converted to:
16357 jmp L2
16359 push CALL_OP
16360 jmp __x86_indirect_thunk
16362 call L1
16364 and a tail call is converted to:
16366 push CALL_OP
16367 jmp __x86_indirect_thunk
16370 static void
16371 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16372 bool sibcall_p)
16374 char thunk_name_buf[32];
16375 char *thunk_name;
16376 char push_buf[64];
16377 enum indirect_thunk_prefix need_prefix
16378 = indirect_thunk_need_prefix (current_output_insn);
16379 int regno = -1;
16381 if (cfun->machine->indirect_branch_type
16382 != indirect_branch_thunk_inline)
16384 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16385 indirect_thunk_needed = true;
16386 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16387 thunk_name = thunk_name_buf;
16389 else
16390 thunk_name = NULL;
16392 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
16393 TARGET_64BIT ? 'q' : 'l', xasm);
16395 if (sibcall_p)
16397 output_asm_insn (push_buf, &call_op);
16398 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16400 else
16402 char indirectlabel1[32];
16403 char indirectlabel2[32];
16405 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16406 INDIRECT_LABEL,
16407 indirectlabelno++);
16408 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16409 INDIRECT_LABEL,
16410 indirectlabelno++);
16412 /* Jump. */
16413 fputs ("\tjmp\t", asm_out_file);
16414 assemble_name_raw (asm_out_file, indirectlabel2);
16415 fputc ('\n', asm_out_file);
16417 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16419 /* An external function may be called via GOT, instead of PLT. */
16420 if (MEM_P (call_op))
16422 struct ix86_address parts;
16423 rtx addr = XEXP (call_op, 0);
16424 if (ix86_decompose_address (addr, &parts)
16425 && parts.base == stack_pointer_rtx)
16427 /* Since call will adjust stack by -UNITS_PER_WORD,
16428 we must convert "disp(stack, index, scale)" to
16429 "disp+UNITS_PER_WORD(stack, index, scale)". */
16430 if (parts.index)
16432 addr = gen_rtx_MULT (Pmode, parts.index,
16433 GEN_INT (parts.scale));
16434 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16435 addr);
16437 else
16438 addr = stack_pointer_rtx;
16440 rtx disp;
16441 if (parts.disp != NULL_RTX)
16442 disp = plus_constant (Pmode, parts.disp,
16443 UNITS_PER_WORD);
16444 else
16445 disp = GEN_INT (UNITS_PER_WORD);
16447 addr = gen_rtx_PLUS (Pmode, addr, disp);
16448 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
16452 output_asm_insn (push_buf, &call_op);
16454 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16456 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16458 /* Call. */
16459 fputs ("\tcall\t", asm_out_file);
16460 assemble_name_raw (asm_out_file, indirectlabel1);
16461 fputc ('\n', asm_out_file);
16465 /* Output indirect branch via a call and return thunk. CALL_OP is
16466 the branch target. XASM is the assembly template for CALL_OP.
16467 Branch is a tail call if SIBCALL_P is true. */
16469 static void
16470 ix86_output_indirect_branch (rtx call_op, const char *xasm,
16471 bool sibcall_p)
16473 if (REG_P (call_op))
16474 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
16475 else
16476 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
16479 /* Output indirect jump. CALL_OP is the jump target. */
16481 const char *
16482 ix86_output_indirect_jmp (rtx call_op)
16484 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
16486 /* We can't have red-zone since "call" in the indirect thunk
16487 pushes the return address onto stack, destroying red-zone. */
16488 if (ix86_red_zone_used)
16489 gcc_unreachable ();
16491 ix86_output_indirect_branch (call_op, "%0", true);
16493 else
16494 output_asm_insn ("%!jmp\t%A0", &call_op);
16495 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
16498 /* Output return instrumentation for current function if needed. */
16500 static void
16501 output_return_instrumentation (void)
16503 if (ix86_instrument_return != instrument_return_none
16504 && flag_fentry
16505 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
16507 if (ix86_flag_record_return)
16508 fprintf (asm_out_file, "1:\n");
16509 switch (ix86_instrument_return)
16511 case instrument_return_call:
16512 fprintf (asm_out_file, "\tcall\t__return__\n");
16513 break;
16514 case instrument_return_nop5:
16515 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
16516 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
16517 break;
16518 case instrument_return_none:
16519 break;
16522 if (ix86_flag_record_return)
16524 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
16525 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
16526 fprintf (asm_out_file, "\t.previous\n");
16531 /* Output function return. CALL_OP is the jump target. Add a REP
16532 prefix to RET if LONG_P is true and function return is kept. */
16534 const char *
16535 ix86_output_function_return (bool long_p)
16537 output_return_instrumentation ();
16539 if (cfun->machine->function_return_type != indirect_branch_keep)
16541 char thunk_name[32];
16542 enum indirect_thunk_prefix need_prefix
16543 = indirect_thunk_need_prefix (current_output_insn);
16545 if (cfun->machine->function_return_type
16546 != indirect_branch_thunk_inline)
16548 bool need_thunk = (cfun->machine->function_return_type
16549 == indirect_branch_thunk);
16550 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
16551 true);
16552 indirect_return_needed |= need_thunk;
16553 fprintf (asm_out_file, "\tjmp\t");
16554 assemble_name (asm_out_file, thunk_name);
16555 putc ('\n', asm_out_file);
16557 else
16558 output_indirect_thunk (INVALID_REGNUM);
16560 return "";
16563 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
16564 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
16567 /* Output indirect function return. RET_OP is the function return
16568 target. */
16570 const char *
16571 ix86_output_indirect_function_return (rtx ret_op)
16573 if (cfun->machine->function_return_type != indirect_branch_keep)
16575 char thunk_name[32];
16576 enum indirect_thunk_prefix need_prefix
16577 = indirect_thunk_need_prefix (current_output_insn);
16578 unsigned int regno = REGNO (ret_op);
16579 gcc_assert (regno == CX_REG);
16581 if (cfun->machine->function_return_type
16582 != indirect_branch_thunk_inline)
16584 bool need_thunk = (cfun->machine->function_return_type
16585 == indirect_branch_thunk);
16586 indirect_thunk_name (thunk_name, regno, need_prefix, true);
16588 if (need_thunk)
16590 indirect_return_via_cx = true;
16591 SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
16593 fprintf (asm_out_file, "\tjmp\t");
16594 assemble_name (asm_out_file, thunk_name);
16595 putc ('\n', asm_out_file);
16597 else
16598 output_indirect_thunk (regno);
16600 else
16602 output_asm_insn ("%!jmp\t%A0", &ret_op);
16603 if (ix86_harden_sls & harden_sls_indirect_jmp)
16604 fputs ("\tint3\n", asm_out_file);
16606 return "";
16609 /* Output the assembly for a call instruction. */
16611 const char *
16612 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
16614 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
16615 bool output_indirect_p
16616 = (!TARGET_SEH
16617 && cfun->machine->indirect_branch_type != indirect_branch_keep);
16618 bool seh_nop_p = false;
16619 const char *xasm;
16621 if (SIBLING_CALL_P (insn))
16623 output_return_instrumentation ();
16624 if (direct_p)
16626 if (ix86_nopic_noplt_attribute_p (call_op))
16628 direct_p = false;
16629 if (TARGET_64BIT)
16631 if (output_indirect_p)
16632 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16633 else
16634 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16636 else
16638 if (output_indirect_p)
16639 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16640 else
16641 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16644 else
16645 xasm = "%!jmp\t%P0";
16647 /* SEH epilogue detection requires the indirect branch case
16648 to include REX.W. */
16649 else if (TARGET_SEH)
16650 xasm = "%!rex.W jmp\t%A0";
16651 else
16653 if (output_indirect_p)
16654 xasm = "%0";
16655 else
16656 xasm = "%!jmp\t%A0";
16659 if (output_indirect_p && !direct_p)
16660 ix86_output_indirect_branch (call_op, xasm, true);
16661 else
16663 output_asm_insn (xasm, &call_op);
16664 if (!direct_p
16665 && (ix86_harden_sls & harden_sls_indirect_jmp))
16666 return "int3";
16668 return "";
16671 /* SEH unwinding can require an extra nop to be emitted in several
16672 circumstances. Determine if we have one of those. */
16673 if (TARGET_SEH)
16675 rtx_insn *i;
16677 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
16679 /* Prevent a catch region from being adjacent to a jump that would
16680 be interpreted as an epilogue sequence by the unwinder. */
16681 if (JUMP_P(i) && CROSSING_JUMP_P (i))
16683 seh_nop_p = true;
16684 break;
16687 /* If we get to another real insn, we don't need the nop. */
16688 if (INSN_P (i))
16689 break;
16691 /* If we get to the epilogue note, prevent a catch region from
16692 being adjacent to the standard epilogue sequence. Note that,
16693 if non-call exceptions are enabled, we already did it during
16694 epilogue expansion, or else, if the insn can throw internally,
16695 we already did it during the reorg pass. */
16696 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
16697 && !flag_non_call_exceptions
16698 && !can_throw_internal (insn))
16700 seh_nop_p = true;
16701 break;
16705 /* If we didn't find a real insn following the call, prevent the
16706 unwinder from looking into the next function. */
16707 if (i == NULL)
16708 seh_nop_p = true;
16711 if (direct_p)
16713 if (ix86_nopic_noplt_attribute_p (call_op))
16715 direct_p = false;
16716 if (TARGET_64BIT)
16718 if (output_indirect_p)
16719 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16720 else
16721 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16723 else
16725 if (output_indirect_p)
16726 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16727 else
16728 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16731 else
16732 xasm = "%!call\t%P0";
16734 else
16736 if (output_indirect_p)
16737 xasm = "%0";
16738 else
16739 xasm = "%!call\t%A0";
16742 if (output_indirect_p && !direct_p)
16743 ix86_output_indirect_branch (call_op, xasm, false);
16744 else
16745 output_asm_insn (xasm, &call_op);
16747 if (seh_nop_p)
16748 return "nop";
16750 return "";
16753 /* Return a MEM corresponding to a stack slot with mode MODE.
16754 Allocate a new slot if necessary.
16756 The RTL for a function can have several slots available: N is
16757 which slot to use. */
16760 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
16762 struct stack_local_entry *s;
16764 gcc_assert (n < MAX_386_STACK_LOCALS);
16766 for (s = ix86_stack_locals; s; s = s->next)
16767 if (s->mode == mode && s->n == n)
16768 return validize_mem (copy_rtx (s->rtl));
16770 int align = 0;
16771 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
16772 alignment with -m32 -mpreferred-stack-boundary=2. */
16773 if (mode == DImode
16774 && !TARGET_64BIT
16775 && n == SLOT_FLOATxFDI_387
16776 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
16777 align = 32;
16778 s = ggc_alloc<stack_local_entry> ();
16779 s->n = n;
16780 s->mode = mode;
16781 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
16783 s->next = ix86_stack_locals;
16784 ix86_stack_locals = s;
16785 return validize_mem (copy_rtx (s->rtl));
16788 static void
16789 ix86_instantiate_decls (void)
16791 struct stack_local_entry *s;
16793 for (s = ix86_stack_locals; s; s = s->next)
16794 if (s->rtl != NULL_RTX)
16795 instantiate_decl_rtl (s->rtl);
16798 /* Check whether x86 address PARTS is a pc-relative address. */
16800 bool
16801 ix86_rip_relative_addr_p (struct ix86_address *parts)
16803 rtx base, index, disp;
16805 base = parts->base;
16806 index = parts->index;
16807 disp = parts->disp;
16809 if (disp && !base && !index)
16811 if (TARGET_64BIT)
16813 rtx symbol = disp;
16815 if (GET_CODE (disp) == CONST)
16816 symbol = XEXP (disp, 0);
16817 if (GET_CODE (symbol) == PLUS
16818 && CONST_INT_P (XEXP (symbol, 1)))
16819 symbol = XEXP (symbol, 0);
16821 if (GET_CODE (symbol) == LABEL_REF
16822 || (GET_CODE (symbol) == SYMBOL_REF
16823 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
16824 || (GET_CODE (symbol) == UNSPEC
16825 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
16826 || XINT (symbol, 1) == UNSPEC_PCREL
16827 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
16828 return true;
16831 return false;
16834 /* Calculate the length of the memory address in the instruction encoding.
16835 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16836 or other prefixes. We never generate addr32 prefix for LEA insn. */
16839 memory_address_length (rtx addr, bool lea)
16841 struct ix86_address parts;
16842 rtx base, index, disp;
16843 int len;
16844 int ok;
16846 if (GET_CODE (addr) == PRE_DEC
16847 || GET_CODE (addr) == POST_INC
16848 || GET_CODE (addr) == PRE_MODIFY
16849 || GET_CODE (addr) == POST_MODIFY)
16850 return 0;
16852 ok = ix86_decompose_address (addr, &parts);
16853 gcc_assert (ok);
16855 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16857 /* If this is not LEA instruction, add the length of addr32 prefix. */
16858 if (TARGET_64BIT && !lea
16859 && (SImode_address_operand (addr, VOIDmode)
16860 || (parts.base && GET_MODE (parts.base) == SImode)
16861 || (parts.index && GET_MODE (parts.index) == SImode)))
16862 len++;
16864 base = parts.base;
16865 index = parts.index;
16866 disp = parts.disp;
16868 if (base && SUBREG_P (base))
16869 base = SUBREG_REG (base);
16870 if (index && SUBREG_P (index))
16871 index = SUBREG_REG (index);
16873 gcc_assert (base == NULL_RTX || REG_P (base));
16874 gcc_assert (index == NULL_RTX || REG_P (index));
16876 /* Rule of thumb:
16877 - esp as the base always wants an index,
16878 - ebp as the base always wants a displacement,
16879 - r12 as the base always wants an index,
16880 - r13 as the base always wants a displacement. */
16882 /* Register Indirect. */
16883 if (base && !index && !disp)
16885 /* esp (for its index) and ebp (for its displacement) need
16886 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16887 code. */
16888 if (base == arg_pointer_rtx
16889 || base == frame_pointer_rtx
16890 || REGNO (base) == SP_REG
16891 || REGNO (base) == BP_REG
16892 || REGNO (base) == R12_REG
16893 || REGNO (base) == R13_REG)
16894 len++;
16897 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16898 is not disp32, but disp32(%rip), so for disp32
16899 SIB byte is needed, unless print_operand_address
16900 optimizes it into disp32(%rip) or (%rip) is implied
16901 by UNSPEC. */
16902 else if (disp && !base && !index)
16904 len += 4;
16905 if (!ix86_rip_relative_addr_p (&parts))
16906 len++;
16908 else
16910 /* Find the length of the displacement constant. */
16911 if (disp)
16913 if (base && satisfies_constraint_K (disp))
16914 len += 1;
16915 else
16916 len += 4;
16918 /* ebp always wants a displacement. Similarly r13. */
16919 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16920 len++;
16922 /* An index requires the two-byte modrm form.... */
16923 if (index
16924 /* ...like esp (or r12), which always wants an index. */
16925 || base == arg_pointer_rtx
16926 || base == frame_pointer_rtx
16927 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16928 len++;
16931 return len;
16934 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16935 is set, expect that insn have 8bit immediate alternative. */
16937 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16939 int len = 0;
16940 int i;
16941 extract_insn_cached (insn);
16942 for (i = recog_data.n_operands - 1; i >= 0; --i)
16943 if (CONSTANT_P (recog_data.operand[i]))
16945 enum attr_mode mode = get_attr_mode (insn);
16947 gcc_assert (!len);
16948 if (shortform && CONST_INT_P (recog_data.operand[i]))
16950 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16951 switch (mode)
16953 case MODE_QI:
16954 len = 1;
16955 continue;
16956 case MODE_HI:
16957 ival = trunc_int_for_mode (ival, HImode);
16958 break;
16959 case MODE_SI:
16960 ival = trunc_int_for_mode (ival, SImode);
16961 break;
16962 default:
16963 break;
16965 if (IN_RANGE (ival, -128, 127))
16967 len = 1;
16968 continue;
16971 switch (mode)
16973 case MODE_QI:
16974 len = 1;
16975 break;
16976 case MODE_HI:
16977 len = 2;
16978 break;
16979 case MODE_SI:
16980 len = 4;
16981 break;
16982 /* Immediates for DImode instructions are encoded
16983 as 32bit sign extended values. */
16984 case MODE_DI:
16985 len = 4;
16986 break;
16987 default:
16988 fatal_insn ("unknown insn mode", insn);
16991 return len;
16994 /* Compute default value for "length_address" attribute. */
16996 ix86_attr_length_address_default (rtx_insn *insn)
16998 int i;
17000 if (get_attr_type (insn) == TYPE_LEA)
17002 rtx set = PATTERN (insn), addr;
17004 if (GET_CODE (set) == PARALLEL)
17005 set = XVECEXP (set, 0, 0);
17007 gcc_assert (GET_CODE (set) == SET);
17009 addr = SET_SRC (set);
17011 return memory_address_length (addr, true);
17014 extract_insn_cached (insn);
17015 for (i = recog_data.n_operands - 1; i >= 0; --i)
17017 rtx op = recog_data.operand[i];
17018 if (MEM_P (op))
17020 constrain_operands_cached (insn, reload_completed);
17021 if (which_alternative != -1)
17023 const char *constraints = recog_data.constraints[i];
17024 int alt = which_alternative;
17026 while (*constraints == '=' || *constraints == '+')
17027 constraints++;
17028 while (alt-- > 0)
17029 while (*constraints++ != ',')
17031 /* Skip ignored operands. */
17032 if (*constraints == 'X')
17033 continue;
17036 int len = memory_address_length (XEXP (op, 0), false);
17038 /* Account for segment prefix for non-default addr spaces. */
17039 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
17040 len++;
17042 return len;
17045 return 0;
17048 /* Compute default value for "length_vex" attribute. It includes
17049 2 or 3 byte VEX prefix and 1 opcode byte. */
17052 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
17053 bool has_vex_w)
17055 int i, reg_only = 2 + 1;
17056 bool has_mem = false;
17058 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
17059 byte VEX prefix. */
17060 if (!has_0f_opcode || has_vex_w)
17061 return 3 + 1;
17063 /* We can always use 2 byte VEX prefix in 32bit. */
17064 if (!TARGET_64BIT)
17065 return 2 + 1;
17067 extract_insn_cached (insn);
17069 for (i = recog_data.n_operands - 1; i >= 0; --i)
17070 if (REG_P (recog_data.operand[i]))
17072 /* REX.W bit uses 3 byte VEX prefix. */
17073 if (GET_MODE (recog_data.operand[i]) == DImode
17074 && GENERAL_REG_P (recog_data.operand[i]))
17075 return 3 + 1;
17077 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17078 operand will be encoded using VEX.B, so be conservative. */
17079 if (REX_INT_REGNO_P (recog_data.operand[i])
17080 || REX_SSE_REGNO_P (recog_data.operand[i]))
17081 reg_only = 3 + 1;
17083 else if (MEM_P (recog_data.operand[i]))
17085 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17086 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17087 return 3 + 1;
17089 has_mem = true;
17092 return has_mem ? 2 + 1 : reg_only;
17096 static bool
17097 ix86_class_likely_spilled_p (reg_class_t);
17099 /* Returns true if lhs of insn is HW function argument register and set up
17100 is_spilled to true if it is likely spilled HW register. */
17101 static bool
17102 insn_is_function_arg (rtx insn, bool* is_spilled)
17104 rtx dst;
17106 if (!NONDEBUG_INSN_P (insn))
17107 return false;
17108 /* Call instructions are not movable, ignore it. */
17109 if (CALL_P (insn))
17110 return false;
17111 insn = PATTERN (insn);
17112 if (GET_CODE (insn) == PARALLEL)
17113 insn = XVECEXP (insn, 0, 0);
17114 if (GET_CODE (insn) != SET)
17115 return false;
17116 dst = SET_DEST (insn);
17117 if (REG_P (dst) && HARD_REGISTER_P (dst)
17118 && ix86_function_arg_regno_p (REGNO (dst)))
17120 /* Is it likely spilled HW register? */
17121 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17122 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17123 *is_spilled = true;
17124 return true;
17126 return false;
17129 /* Add output dependencies for chain of function adjacent arguments if only
17130 there is a move to likely spilled HW register. Return first argument
17131 if at least one dependence was added or NULL otherwise. */
17132 static rtx_insn *
17133 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17135 rtx_insn *insn;
17136 rtx_insn *last = call;
17137 rtx_insn *first_arg = NULL;
17138 bool is_spilled = false;
17140 head = PREV_INSN (head);
17142 /* Find nearest to call argument passing instruction. */
17143 while (true)
17145 last = PREV_INSN (last);
17146 if (last == head)
17147 return NULL;
17148 if (!NONDEBUG_INSN_P (last))
17149 continue;
17150 if (insn_is_function_arg (last, &is_spilled))
17151 break;
17152 return NULL;
17155 first_arg = last;
17156 while (true)
17158 insn = PREV_INSN (last);
17159 if (!INSN_P (insn))
17160 break;
17161 if (insn == head)
17162 break;
17163 if (!NONDEBUG_INSN_P (insn))
17165 last = insn;
17166 continue;
17168 if (insn_is_function_arg (insn, &is_spilled))
17170 /* Add output depdendence between two function arguments if chain
17171 of output arguments contains likely spilled HW registers. */
17172 if (is_spilled)
17173 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17174 first_arg = last = insn;
17176 else
17177 break;
17179 if (!is_spilled)
17180 return NULL;
17181 return first_arg;
17184 /* Add output or anti dependency from insn to first_arg to restrict its code
17185 motion. */
17186 static void
17187 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17189 rtx set;
17190 rtx tmp;
17192 set = single_set (insn);
17193 if (!set)
17194 return;
17195 tmp = SET_DEST (set);
17196 if (REG_P (tmp))
17198 /* Add output dependency to the first function argument. */
17199 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17200 return;
17202 /* Add anti dependency. */
17203 add_dependence (first_arg, insn, REG_DEP_ANTI);
17206 /* Avoid cross block motion of function argument through adding dependency
17207 from the first non-jump instruction in bb. */
17208 static void
17209 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17211 rtx_insn *insn = BB_END (bb);
17213 while (insn)
17215 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17217 rtx set = single_set (insn);
17218 if (set)
17220 avoid_func_arg_motion (arg, insn);
17221 return;
17224 if (insn == BB_HEAD (bb))
17225 return;
17226 insn = PREV_INSN (insn);
17230 /* Hook for pre-reload schedule - avoid motion of function arguments
17231 passed in likely spilled HW registers. */
17232 static void
17233 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17235 rtx_insn *insn;
17236 rtx_insn *first_arg = NULL;
17237 if (reload_completed)
17238 return;
17239 while (head != tail && DEBUG_INSN_P (head))
17240 head = NEXT_INSN (head);
17241 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17242 if (INSN_P (insn) && CALL_P (insn))
17244 first_arg = add_parameter_dependencies (insn, head);
17245 if (first_arg)
17247 /* Add dependee for first argument to predecessors if only
17248 region contains more than one block. */
17249 basic_block bb = BLOCK_FOR_INSN (insn);
17250 int rgn = CONTAINING_RGN (bb->index);
17251 int nr_blks = RGN_NR_BLOCKS (rgn);
17252 /* Skip trivial regions and region head blocks that can have
17253 predecessors outside of region. */
17254 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17256 edge e;
17257 edge_iterator ei;
17259 /* Regions are SCCs with the exception of selective
17260 scheduling with pipelining of outer blocks enabled.
17261 So also check that immediate predecessors of a non-head
17262 block are in the same region. */
17263 FOR_EACH_EDGE (e, ei, bb->preds)
17265 /* Avoid creating of loop-carried dependencies through
17266 using topological ordering in the region. */
17267 if (rgn == CONTAINING_RGN (e->src->index)
17268 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17269 add_dependee_for_func_arg (first_arg, e->src);
17272 insn = first_arg;
17273 if (insn == head)
17274 break;
17277 else if (first_arg)
17278 avoid_func_arg_motion (first_arg, insn);
17281 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17282 HW registers to maximum, to schedule them at soon as possible. These are
17283 moves from function argument registers at the top of the function entry
17284 and moves from function return value registers after call. */
17285 static int
17286 ix86_adjust_priority (rtx_insn *insn, int priority)
17288 rtx set;
17290 if (reload_completed)
17291 return priority;
17293 if (!NONDEBUG_INSN_P (insn))
17294 return priority;
17296 set = single_set (insn);
17297 if (set)
17299 rtx tmp = SET_SRC (set);
17300 if (REG_P (tmp)
17301 && HARD_REGISTER_P (tmp)
17302 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17303 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17304 return current_sched_info->sched_max_insns_priority;
17307 return priority;
17310 /* Prepare for scheduling pass. */
17311 static void
17312 ix86_sched_init_global (FILE *, int, int)
17314 /* Install scheduling hooks for current CPU. Some of these hooks are used
17315 in time-critical parts of the scheduler, so we only set them up when
17316 they are actually used. */
17317 switch (ix86_tune)
17319 case PROCESSOR_CORE2:
17320 case PROCESSOR_NEHALEM:
17321 case PROCESSOR_SANDYBRIDGE:
17322 case PROCESSOR_HASWELL:
17323 case PROCESSOR_TREMONT:
17324 case PROCESSOR_ALDERLAKE:
17325 case PROCESSOR_GENERIC:
17326 /* Do not perform multipass scheduling for pre-reload schedule
17327 to save compile time. */
17328 if (reload_completed)
17330 ix86_core2i7_init_hooks ();
17331 break;
17333 /* Fall through. */
17334 default:
17335 targetm.sched.dfa_post_advance_cycle = NULL;
17336 targetm.sched.first_cycle_multipass_init = NULL;
17337 targetm.sched.first_cycle_multipass_begin = NULL;
17338 targetm.sched.first_cycle_multipass_issue = NULL;
17339 targetm.sched.first_cycle_multipass_backtrack = NULL;
17340 targetm.sched.first_cycle_multipass_end = NULL;
17341 targetm.sched.first_cycle_multipass_fini = NULL;
17342 break;
17347 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17349 static HOST_WIDE_INT
17350 ix86_static_rtx_alignment (machine_mode mode)
17352 if (mode == DFmode)
17353 return 64;
17354 if (ALIGN_MODE_128 (mode))
17355 return MAX (128, GET_MODE_ALIGNMENT (mode));
17356 return GET_MODE_ALIGNMENT (mode);
17359 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17361 static HOST_WIDE_INT
17362 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17364 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17365 || TREE_CODE (exp) == INTEGER_CST)
17367 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17368 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17369 return MAX (mode_align, align);
17371 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17372 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17373 return BITS_PER_WORD;
17375 return align;
17378 /* Implement TARGET_EMPTY_RECORD_P. */
17380 static bool
17381 ix86_is_empty_record (const_tree type)
17383 if (!TARGET_64BIT)
17384 return false;
17385 return default_is_empty_record (type);
17388 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17390 static void
17391 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17393 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
17395 if (!cum->warn_empty)
17396 return;
17398 if (!TYPE_EMPTY_P (type))
17399 return;
17401 /* Don't warn if the function isn't visible outside of the TU. */
17402 if (cum->decl && !TREE_PUBLIC (cum->decl))
17403 return;
17405 const_tree ctx = get_ultimate_context (cum->decl);
17406 if (ctx != NULL_TREE
17407 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17408 return;
17410 /* If the actual size of the type is zero, then there is no change
17411 in how objects of this size are passed. */
17412 if (int_size_in_bytes (type) == 0)
17413 return;
17415 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17416 "changes in %<-fabi-version=12%> (GCC 8)", type);
17418 /* Only warn once. */
17419 cum->warn_empty = false;
17422 /* This hook returns name of multilib ABI. */
17424 static const char *
17425 ix86_get_multilib_abi_name (void)
17427 if (!(TARGET_64BIT_P (ix86_isa_flags)))
17428 return "i386";
17429 else if (TARGET_X32_P (ix86_isa_flags))
17430 return "x32";
17431 else
17432 return "x86_64";
17435 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
17436 the data type, and ALIGN is the alignment that the object would
17437 ordinarily have. */
17439 static int
17440 iamcu_alignment (tree type, int align)
17442 machine_mode mode;
17444 if (align < 32 || TYPE_USER_ALIGN (type))
17445 return align;
17447 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
17448 bytes. */
17449 type = strip_array_types (type);
17450 if (TYPE_ATOMIC (type))
17451 return align;
17453 mode = TYPE_MODE (type);
17454 switch (GET_MODE_CLASS (mode))
17456 case MODE_INT:
17457 case MODE_COMPLEX_INT:
17458 case MODE_COMPLEX_FLOAT:
17459 case MODE_FLOAT:
17460 case MODE_DECIMAL_FLOAT:
17461 return 32;
17462 default:
17463 return align;
17467 /* Compute the alignment for a static variable.
17468 TYPE is the data type, and ALIGN is the alignment that
17469 the object would ordinarily have. The value of this function is used
17470 instead of that alignment to align the object. */
17473 ix86_data_alignment (tree type, unsigned int align, bool opt)
17475 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
17476 for symbols from other compilation units or symbols that don't need
17477 to bind locally. In order to preserve some ABI compatibility with
17478 those compilers, ensure we don't decrease alignment from what we
17479 used to assume. */
17481 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
17483 /* A data structure, equal or greater than the size of a cache line
17484 (64 bytes in the Pentium 4 and other recent Intel processors, including
17485 processors based on Intel Core microarchitecture) should be aligned
17486 so that its base address is a multiple of a cache line size. */
17488 unsigned int max_align
17489 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
17491 if (max_align < BITS_PER_WORD)
17492 max_align = BITS_PER_WORD;
17494 switch (ix86_align_data_type)
17496 case ix86_align_data_type_abi: opt = false; break;
17497 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
17498 case ix86_align_data_type_cacheline: break;
17501 if (TARGET_IAMCU)
17502 align = iamcu_alignment (type, align);
17504 if (opt
17505 && AGGREGATE_TYPE_P (type)
17506 && TYPE_SIZE (type)
17507 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
17509 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
17510 && align < max_align_compat)
17511 align = max_align_compat;
17512 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
17513 && align < max_align)
17514 align = max_align;
17517 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17518 to 16byte boundary. */
17519 if (TARGET_64BIT)
17521 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
17522 && TYPE_SIZE (type)
17523 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17524 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17525 && align < 128)
17526 return 128;
17529 if (!opt)
17530 return align;
17532 if (TREE_CODE (type) == ARRAY_TYPE)
17534 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17535 return 64;
17536 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17537 return 128;
17539 else if (TREE_CODE (type) == COMPLEX_TYPE)
17542 if (TYPE_MODE (type) == DCmode && align < 64)
17543 return 64;
17544 if ((TYPE_MODE (type) == XCmode
17545 || TYPE_MODE (type) == TCmode) && align < 128)
17546 return 128;
17548 else if (RECORD_OR_UNION_TYPE_P (type)
17549 && TYPE_FIELDS (type))
17551 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17552 return 64;
17553 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17554 return 128;
17556 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
17557 || TREE_CODE (type) == INTEGER_TYPE)
17559 if (TYPE_MODE (type) == DFmode && align < 64)
17560 return 64;
17561 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17562 return 128;
17565 return align;
17568 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
17569 static void
17570 ix86_lower_local_decl_alignment (tree decl)
17572 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
17573 DECL_ALIGN (decl), true);
17574 if (new_align < DECL_ALIGN (decl))
17575 SET_DECL_ALIGN (decl, new_align);
17578 /* Compute the alignment for a local variable or a stack slot. EXP is
17579 the data type or decl itself, MODE is the widest mode available and
17580 ALIGN is the alignment that the object would ordinarily have. The
17581 value of this macro is used instead of that alignment to align the
17582 object. */
17584 unsigned int
17585 ix86_local_alignment (tree exp, machine_mode mode,
17586 unsigned int align, bool may_lower)
17588 tree type, decl;
17590 if (exp && DECL_P (exp))
17592 type = TREE_TYPE (exp);
17593 decl = exp;
17595 else
17597 type = exp;
17598 decl = NULL;
17601 /* Don't do dynamic stack realignment for long long objects with
17602 -mpreferred-stack-boundary=2. */
17603 if (may_lower
17604 && !TARGET_64BIT
17605 && align == 64
17606 && ix86_preferred_stack_boundary < 64
17607 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
17608 && (!type || (!TYPE_USER_ALIGN (type)
17609 && !TYPE_ATOMIC (strip_array_types (type))))
17610 && (!decl || !DECL_USER_ALIGN (decl)))
17611 align = 32;
17613 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17614 register in MODE. We will return the largest alignment of XF
17615 and DF. */
17616 if (!type)
17618 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17619 align = GET_MODE_ALIGNMENT (DFmode);
17620 return align;
17623 /* Don't increase alignment for Intel MCU psABI. */
17624 if (TARGET_IAMCU)
17625 return align;
17627 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17628 to 16byte boundary. Exact wording is:
17630 An array uses the same alignment as its elements, except that a local or
17631 global array variable of length at least 16 bytes or
17632 a C99 variable-length array variable always has alignment of at least 16 bytes.
17634 This was added to allow use of aligned SSE instructions at arrays. This
17635 rule is meant for static storage (where compiler cannot do the analysis
17636 by itself). We follow it for automatic variables only when convenient.
17637 We fully control everything in the function compiled and functions from
17638 other unit cannot rely on the alignment.
17640 Exclude va_list type. It is the common case of local array where
17641 we cannot benefit from the alignment.
17643 TODO: Probably one should optimize for size only when var is not escaping. */
17644 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
17645 && TARGET_SSE)
17647 if (AGGREGATE_TYPE_P (type)
17648 && (va_list_type_node == NULL_TREE
17649 || (TYPE_MAIN_VARIANT (type)
17650 != TYPE_MAIN_VARIANT (va_list_type_node)))
17651 && TYPE_SIZE (type)
17652 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17653 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17654 && align < 128)
17655 return 128;
17657 if (TREE_CODE (type) == ARRAY_TYPE)
17659 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17660 return 64;
17661 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17662 return 128;
17664 else if (TREE_CODE (type) == COMPLEX_TYPE)
17666 if (TYPE_MODE (type) == DCmode && align < 64)
17667 return 64;
17668 if ((TYPE_MODE (type) == XCmode
17669 || TYPE_MODE (type) == TCmode) && align < 128)
17670 return 128;
17672 else if (RECORD_OR_UNION_TYPE_P (type)
17673 && TYPE_FIELDS (type))
17675 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17676 return 64;
17677 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17678 return 128;
17680 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
17681 || TREE_CODE (type) == INTEGER_TYPE)
17684 if (TYPE_MODE (type) == DFmode && align < 64)
17685 return 64;
17686 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17687 return 128;
17689 return align;
17692 /* Compute the minimum required alignment for dynamic stack realignment
17693 purposes for a local variable, parameter or a stack slot. EXP is
17694 the data type or decl itself, MODE is its mode and ALIGN is the
17695 alignment that the object would ordinarily have. */
17697 unsigned int
17698 ix86_minimum_alignment (tree exp, machine_mode mode,
17699 unsigned int align)
17701 tree type, decl;
17703 if (exp && DECL_P (exp))
17705 type = TREE_TYPE (exp);
17706 decl = exp;
17708 else
17710 type = exp;
17711 decl = NULL;
17714 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
17715 return align;
17717 /* Don't do dynamic stack realignment for long long objects with
17718 -mpreferred-stack-boundary=2. */
17719 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
17720 && (!type || (!TYPE_USER_ALIGN (type)
17721 && !TYPE_ATOMIC (strip_array_types (type))))
17722 && (!decl || !DECL_USER_ALIGN (decl)))
17724 gcc_checking_assert (!TARGET_STV);
17725 return 32;
17728 return align;
17731 /* Find a location for the static chain incoming to a nested function.
17732 This is a register, unless all free registers are used by arguments. */
17734 static rtx
17735 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
17737 unsigned regno;
17739 if (TARGET_64BIT)
17741 /* We always use R10 in 64-bit mode. */
17742 regno = R10_REG;
17744 else
17746 const_tree fntype, fndecl;
17747 unsigned int ccvt;
17749 /* By default in 32-bit mode we use ECX to pass the static chain. */
17750 regno = CX_REG;
17752 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
17754 fntype = TREE_TYPE (fndecl_or_type);
17755 fndecl = fndecl_or_type;
17757 else
17759 fntype = fndecl_or_type;
17760 fndecl = NULL;
17763 ccvt = ix86_get_callcvt (fntype);
17764 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
17766 /* Fastcall functions use ecx/edx for arguments, which leaves
17767 us with EAX for the static chain.
17768 Thiscall functions use ecx for arguments, which also
17769 leaves us with EAX for the static chain. */
17770 regno = AX_REG;
17772 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
17774 /* Thiscall functions use ecx for arguments, which leaves
17775 us with EAX and EDX for the static chain.
17776 We are using for abi-compatibility EAX. */
17777 regno = AX_REG;
17779 else if (ix86_function_regparm (fntype, fndecl) == 3)
17781 /* For regparm 3, we have no free call-clobbered registers in
17782 which to store the static chain. In order to implement this,
17783 we have the trampoline push the static chain to the stack.
17784 However, we can't push a value below the return address when
17785 we call the nested function directly, so we have to use an
17786 alternate entry point. For this we use ESI, and have the
17787 alternate entry point push ESI, so that things appear the
17788 same once we're executing the nested function. */
17789 if (incoming_p)
17791 if (fndecl == current_function_decl
17792 && !ix86_static_chain_on_stack)
17794 gcc_assert (!reload_completed);
17795 ix86_static_chain_on_stack = true;
17797 return gen_frame_mem (SImode,
17798 plus_constant (Pmode,
17799 arg_pointer_rtx, -8));
17801 regno = SI_REG;
17805 return gen_rtx_REG (Pmode, regno);
17808 /* Emit RTL insns to initialize the variable parts of a trampoline.
17809 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17810 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17811 to be passed to the target function. */
17813 static void
17814 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
17816 rtx mem, fnaddr;
17817 int opcode;
17818 int offset = 0;
17819 bool need_endbr = (flag_cf_protection & CF_BRANCH);
17821 fnaddr = XEXP (DECL_RTL (fndecl), 0);
17823 if (TARGET_64BIT)
17825 int size;
17827 if (need_endbr)
17829 /* Insert ENDBR64. */
17830 mem = adjust_address (m_tramp, SImode, offset);
17831 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
17832 offset += 4;
17835 /* Load the function address to r11. Try to load address using
17836 the shorter movl instead of movabs. We may want to support
17837 movq for kernel mode, but kernel does not use trampolines at
17838 the moment. FNADDR is a 32bit address and may not be in
17839 DImode when ptr_mode == SImode. Always use movl in this
17840 case. */
17841 if (ptr_mode == SImode
17842 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17844 fnaddr = copy_addr_to_reg (fnaddr);
17846 mem = adjust_address (m_tramp, HImode, offset);
17847 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
17849 mem = adjust_address (m_tramp, SImode, offset + 2);
17850 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
17851 offset += 6;
17853 else
17855 mem = adjust_address (m_tramp, HImode, offset);
17856 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
17858 mem = adjust_address (m_tramp, DImode, offset + 2);
17859 emit_move_insn (mem, fnaddr);
17860 offset += 10;
17863 /* Load static chain using movabs to r10. Use the shorter movl
17864 instead of movabs when ptr_mode == SImode. */
17865 if (ptr_mode == SImode)
17867 opcode = 0xba41;
17868 size = 6;
17870 else
17872 opcode = 0xba49;
17873 size = 10;
17876 mem = adjust_address (m_tramp, HImode, offset);
17877 emit_move_insn (mem, gen_int_mode (opcode, HImode));
17879 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17880 emit_move_insn (mem, chain_value);
17881 offset += size;
17883 /* Jump to r11; the last (unused) byte is a nop, only there to
17884 pad the write out to a single 32-bit store. */
17885 mem = adjust_address (m_tramp, SImode, offset);
17886 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17887 offset += 4;
17889 else
17891 rtx disp, chain;
17893 /* Depending on the static chain location, either load a register
17894 with a constant, or push the constant to the stack. All of the
17895 instructions are the same size. */
17896 chain = ix86_static_chain (fndecl, true);
17897 if (REG_P (chain))
17899 switch (REGNO (chain))
17901 case AX_REG:
17902 opcode = 0xb8; break;
17903 case CX_REG:
17904 opcode = 0xb9; break;
17905 default:
17906 gcc_unreachable ();
17909 else
17910 opcode = 0x68;
17912 if (need_endbr)
17914 /* Insert ENDBR32. */
17915 mem = adjust_address (m_tramp, SImode, offset);
17916 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17917 offset += 4;
17920 mem = adjust_address (m_tramp, QImode, offset);
17921 emit_move_insn (mem, gen_int_mode (opcode, QImode));
17923 mem = adjust_address (m_tramp, SImode, offset + 1);
17924 emit_move_insn (mem, chain_value);
17925 offset += 5;
17927 mem = adjust_address (m_tramp, QImode, offset);
17928 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17930 mem = adjust_address (m_tramp, SImode, offset + 1);
17932 /* Compute offset from the end of the jmp to the target function.
17933 In the case in which the trampoline stores the static chain on
17934 the stack, we need to skip the first insn which pushes the
17935 (call-saved) register static chain; this push is 1 byte. */
17936 offset += 5;
17937 int skip = MEM_P (chain) ? 1 : 0;
17938 /* Skip ENDBR32 at the entry of the target function. */
17939 if (need_endbr
17940 && !cgraph_node::get (fndecl)->only_called_directly_p ())
17941 skip += 4;
17942 disp = expand_binop (SImode, sub_optab, fnaddr,
17943 plus_constant (Pmode, XEXP (m_tramp, 0),
17944 offset - skip),
17945 NULL_RTX, 1, OPTAB_DIRECT);
17946 emit_move_insn (mem, disp);
17949 gcc_assert (offset <= TRAMPOLINE_SIZE);
17951 #ifdef HAVE_ENABLE_EXECUTE_STACK
17952 #ifdef CHECK_EXECUTE_STACK_ENABLED
17953 if (CHECK_EXECUTE_STACK_ENABLED)
17954 #endif
17955 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17956 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17957 #endif
17960 static bool
17961 ix86_allocate_stack_slots_for_args (void)
17963 /* Naked functions should not allocate stack slots for arguments. */
17964 return !ix86_function_naked (current_function_decl);
17967 static bool
17968 ix86_warn_func_return (tree decl)
17970 /* Naked functions are implemented entirely in assembly, including the
17971 return sequence, so suppress warnings about this. */
17972 return !ix86_function_naked (decl);
17975 /* Return the shift count of a vector by scalar shift builtin second argument
17976 ARG1. */
17977 static tree
17978 ix86_vector_shift_count (tree arg1)
17980 if (tree_fits_uhwi_p (arg1))
17981 return arg1;
17982 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17984 /* The count argument is weird, passed in as various 128-bit
17985 (or 64-bit) vectors, the low 64 bits from it are the count. */
17986 unsigned char buf[16];
17987 int len = native_encode_expr (arg1, buf, 16);
17988 if (len == 0)
17989 return NULL_TREE;
17990 tree t = native_interpret_expr (uint64_type_node, buf, len);
17991 if (t && tree_fits_uhwi_p (t))
17992 return t;
17994 return NULL_TREE;
17997 /* Return true if arg_mask is all ones, ELEMS is elements number of
17998 corresponding vector. */
17999 static bool
18000 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
18002 if (TREE_CODE (arg_mask) != INTEGER_CST)
18003 return false;
18005 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
18006 if (elems == HOST_BITS_PER_WIDE_INT)
18007 return mask == HOST_WIDE_INT_M1U;
18008 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18009 return false;
18011 return true;
18014 static tree
18015 ix86_fold_builtin (tree fndecl, int n_args,
18016 tree *args, bool ignore ATTRIBUTE_UNUSED)
18018 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
18020 enum ix86_builtins fn_code
18021 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
18022 enum rtx_code rcode;
18023 bool is_vshift;
18024 unsigned HOST_WIDE_INT mask;
18026 switch (fn_code)
18028 case IX86_BUILTIN_CPU_IS:
18029 case IX86_BUILTIN_CPU_SUPPORTS:
18030 gcc_assert (n_args == 1);
18031 return fold_builtin_cpu (fndecl, args);
18033 case IX86_BUILTIN_NANQ:
18034 case IX86_BUILTIN_NANSQ:
18036 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18037 const char *str = c_getstr (*args);
18038 int quiet = fn_code == IX86_BUILTIN_NANQ;
18039 REAL_VALUE_TYPE real;
18041 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
18042 return build_real (type, real);
18043 return NULL_TREE;
18046 case IX86_BUILTIN_INFQ:
18047 case IX86_BUILTIN_HUGE_VALQ:
18049 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18050 REAL_VALUE_TYPE inf;
18051 real_inf (&inf);
18052 return build_real (type, inf);
18055 case IX86_BUILTIN_TZCNT16:
18056 case IX86_BUILTIN_CTZS:
18057 case IX86_BUILTIN_TZCNT32:
18058 case IX86_BUILTIN_TZCNT64:
18059 gcc_assert (n_args == 1);
18060 if (TREE_CODE (args[0]) == INTEGER_CST)
18062 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18063 tree arg = args[0];
18064 if (fn_code == IX86_BUILTIN_TZCNT16
18065 || fn_code == IX86_BUILTIN_CTZS)
18066 arg = fold_convert (short_unsigned_type_node, arg);
18067 if (integer_zerop (arg))
18068 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18069 else
18070 return fold_const_call (CFN_CTZ, type, arg);
18072 break;
18074 case IX86_BUILTIN_LZCNT16:
18075 case IX86_BUILTIN_CLZS:
18076 case IX86_BUILTIN_LZCNT32:
18077 case IX86_BUILTIN_LZCNT64:
18078 gcc_assert (n_args == 1);
18079 if (TREE_CODE (args[0]) == INTEGER_CST)
18081 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18082 tree arg = args[0];
18083 if (fn_code == IX86_BUILTIN_LZCNT16
18084 || fn_code == IX86_BUILTIN_CLZS)
18085 arg = fold_convert (short_unsigned_type_node, arg);
18086 if (integer_zerop (arg))
18087 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18088 else
18089 return fold_const_call (CFN_CLZ, type, arg);
18091 break;
18093 case IX86_BUILTIN_BEXTR32:
18094 case IX86_BUILTIN_BEXTR64:
18095 case IX86_BUILTIN_BEXTRI32:
18096 case IX86_BUILTIN_BEXTRI64:
18097 gcc_assert (n_args == 2);
18098 if (tree_fits_uhwi_p (args[1]))
18100 unsigned HOST_WIDE_INT res = 0;
18101 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18102 unsigned int start = tree_to_uhwi (args[1]);
18103 unsigned int len = (start & 0xff00) >> 8;
18104 start &= 0xff;
18105 if (start >= prec || len == 0)
18106 res = 0;
18107 else if (!tree_fits_uhwi_p (args[0]))
18108 break;
18109 else
18110 res = tree_to_uhwi (args[0]) >> start;
18111 if (len > prec)
18112 len = prec;
18113 if (len < HOST_BITS_PER_WIDE_INT)
18114 res &= (HOST_WIDE_INT_1U << len) - 1;
18115 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18117 break;
18119 case IX86_BUILTIN_BZHI32:
18120 case IX86_BUILTIN_BZHI64:
18121 gcc_assert (n_args == 2);
18122 if (tree_fits_uhwi_p (args[1]))
18124 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18125 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18126 return args[0];
18127 if (idx == 0)
18128 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
18129 if (!tree_fits_uhwi_p (args[0]))
18130 break;
18131 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18132 res &= ~(HOST_WIDE_INT_M1U << idx);
18133 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18135 break;
18137 case IX86_BUILTIN_PDEP32:
18138 case IX86_BUILTIN_PDEP64:
18139 gcc_assert (n_args == 2);
18140 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18142 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18143 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18144 unsigned HOST_WIDE_INT res = 0;
18145 unsigned HOST_WIDE_INT m, k = 1;
18146 for (m = 1; m; m <<= 1)
18147 if ((mask & m) != 0)
18149 if ((src & k) != 0)
18150 res |= m;
18151 k <<= 1;
18153 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18155 break;
18157 case IX86_BUILTIN_PEXT32:
18158 case IX86_BUILTIN_PEXT64:
18159 gcc_assert (n_args == 2);
18160 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18162 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18163 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18164 unsigned HOST_WIDE_INT res = 0;
18165 unsigned HOST_WIDE_INT m, k = 1;
18166 for (m = 1; m; m <<= 1)
18167 if ((mask & m) != 0)
18169 if ((src & m) != 0)
18170 res |= k;
18171 k <<= 1;
18173 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18175 break;
18177 case IX86_BUILTIN_MOVMSKPS:
18178 case IX86_BUILTIN_PMOVMSKB:
18179 case IX86_BUILTIN_MOVMSKPD:
18180 case IX86_BUILTIN_PMOVMSKB128:
18181 case IX86_BUILTIN_MOVMSKPD256:
18182 case IX86_BUILTIN_MOVMSKPS256:
18183 case IX86_BUILTIN_PMOVMSKB256:
18184 gcc_assert (n_args == 1);
18185 if (TREE_CODE (args[0]) == VECTOR_CST)
18187 HOST_WIDE_INT res = 0;
18188 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18190 tree e = VECTOR_CST_ELT (args[0], i);
18191 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18193 if (wi::neg_p (wi::to_wide (e)))
18194 res |= HOST_WIDE_INT_1 << i;
18196 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18198 if (TREE_REAL_CST (e).sign)
18199 res |= HOST_WIDE_INT_1 << i;
18201 else
18202 return NULL_TREE;
18204 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18206 break;
18208 case IX86_BUILTIN_PSLLD:
18209 case IX86_BUILTIN_PSLLD128:
18210 case IX86_BUILTIN_PSLLD128_MASK:
18211 case IX86_BUILTIN_PSLLD256:
18212 case IX86_BUILTIN_PSLLD256_MASK:
18213 case IX86_BUILTIN_PSLLD512:
18214 case IX86_BUILTIN_PSLLDI:
18215 case IX86_BUILTIN_PSLLDI128:
18216 case IX86_BUILTIN_PSLLDI128_MASK:
18217 case IX86_BUILTIN_PSLLDI256:
18218 case IX86_BUILTIN_PSLLDI256_MASK:
18219 case IX86_BUILTIN_PSLLDI512:
18220 case IX86_BUILTIN_PSLLQ:
18221 case IX86_BUILTIN_PSLLQ128:
18222 case IX86_BUILTIN_PSLLQ128_MASK:
18223 case IX86_BUILTIN_PSLLQ256:
18224 case IX86_BUILTIN_PSLLQ256_MASK:
18225 case IX86_BUILTIN_PSLLQ512:
18226 case IX86_BUILTIN_PSLLQI:
18227 case IX86_BUILTIN_PSLLQI128:
18228 case IX86_BUILTIN_PSLLQI128_MASK:
18229 case IX86_BUILTIN_PSLLQI256:
18230 case IX86_BUILTIN_PSLLQI256_MASK:
18231 case IX86_BUILTIN_PSLLQI512:
18232 case IX86_BUILTIN_PSLLW:
18233 case IX86_BUILTIN_PSLLW128:
18234 case IX86_BUILTIN_PSLLW128_MASK:
18235 case IX86_BUILTIN_PSLLW256:
18236 case IX86_BUILTIN_PSLLW256_MASK:
18237 case IX86_BUILTIN_PSLLW512_MASK:
18238 case IX86_BUILTIN_PSLLWI:
18239 case IX86_BUILTIN_PSLLWI128:
18240 case IX86_BUILTIN_PSLLWI128_MASK:
18241 case IX86_BUILTIN_PSLLWI256:
18242 case IX86_BUILTIN_PSLLWI256_MASK:
18243 case IX86_BUILTIN_PSLLWI512_MASK:
18244 rcode = ASHIFT;
18245 is_vshift = false;
18246 goto do_shift;
18247 case IX86_BUILTIN_PSRAD:
18248 case IX86_BUILTIN_PSRAD128:
18249 case IX86_BUILTIN_PSRAD128_MASK:
18250 case IX86_BUILTIN_PSRAD256:
18251 case IX86_BUILTIN_PSRAD256_MASK:
18252 case IX86_BUILTIN_PSRAD512:
18253 case IX86_BUILTIN_PSRADI:
18254 case IX86_BUILTIN_PSRADI128:
18255 case IX86_BUILTIN_PSRADI128_MASK:
18256 case IX86_BUILTIN_PSRADI256:
18257 case IX86_BUILTIN_PSRADI256_MASK:
18258 case IX86_BUILTIN_PSRADI512:
18259 case IX86_BUILTIN_PSRAQ128_MASK:
18260 case IX86_BUILTIN_PSRAQ256_MASK:
18261 case IX86_BUILTIN_PSRAQ512:
18262 case IX86_BUILTIN_PSRAQI128_MASK:
18263 case IX86_BUILTIN_PSRAQI256_MASK:
18264 case IX86_BUILTIN_PSRAQI512:
18265 case IX86_BUILTIN_PSRAW:
18266 case IX86_BUILTIN_PSRAW128:
18267 case IX86_BUILTIN_PSRAW128_MASK:
18268 case IX86_BUILTIN_PSRAW256:
18269 case IX86_BUILTIN_PSRAW256_MASK:
18270 case IX86_BUILTIN_PSRAW512:
18271 case IX86_BUILTIN_PSRAWI:
18272 case IX86_BUILTIN_PSRAWI128:
18273 case IX86_BUILTIN_PSRAWI128_MASK:
18274 case IX86_BUILTIN_PSRAWI256:
18275 case IX86_BUILTIN_PSRAWI256_MASK:
18276 case IX86_BUILTIN_PSRAWI512:
18277 rcode = ASHIFTRT;
18278 is_vshift = false;
18279 goto do_shift;
18280 case IX86_BUILTIN_PSRLD:
18281 case IX86_BUILTIN_PSRLD128:
18282 case IX86_BUILTIN_PSRLD128_MASK:
18283 case IX86_BUILTIN_PSRLD256:
18284 case IX86_BUILTIN_PSRLD256_MASK:
18285 case IX86_BUILTIN_PSRLD512:
18286 case IX86_BUILTIN_PSRLDI:
18287 case IX86_BUILTIN_PSRLDI128:
18288 case IX86_BUILTIN_PSRLDI128_MASK:
18289 case IX86_BUILTIN_PSRLDI256:
18290 case IX86_BUILTIN_PSRLDI256_MASK:
18291 case IX86_BUILTIN_PSRLDI512:
18292 case IX86_BUILTIN_PSRLQ:
18293 case IX86_BUILTIN_PSRLQ128:
18294 case IX86_BUILTIN_PSRLQ128_MASK:
18295 case IX86_BUILTIN_PSRLQ256:
18296 case IX86_BUILTIN_PSRLQ256_MASK:
18297 case IX86_BUILTIN_PSRLQ512:
18298 case IX86_BUILTIN_PSRLQI:
18299 case IX86_BUILTIN_PSRLQI128:
18300 case IX86_BUILTIN_PSRLQI128_MASK:
18301 case IX86_BUILTIN_PSRLQI256:
18302 case IX86_BUILTIN_PSRLQI256_MASK:
18303 case IX86_BUILTIN_PSRLQI512:
18304 case IX86_BUILTIN_PSRLW:
18305 case IX86_BUILTIN_PSRLW128:
18306 case IX86_BUILTIN_PSRLW128_MASK:
18307 case IX86_BUILTIN_PSRLW256:
18308 case IX86_BUILTIN_PSRLW256_MASK:
18309 case IX86_BUILTIN_PSRLW512:
18310 case IX86_BUILTIN_PSRLWI:
18311 case IX86_BUILTIN_PSRLWI128:
18312 case IX86_BUILTIN_PSRLWI128_MASK:
18313 case IX86_BUILTIN_PSRLWI256:
18314 case IX86_BUILTIN_PSRLWI256_MASK:
18315 case IX86_BUILTIN_PSRLWI512:
18316 rcode = LSHIFTRT;
18317 is_vshift = false;
18318 goto do_shift;
18319 case IX86_BUILTIN_PSLLVV16HI:
18320 case IX86_BUILTIN_PSLLVV16SI:
18321 case IX86_BUILTIN_PSLLVV2DI:
18322 case IX86_BUILTIN_PSLLVV2DI_MASK:
18323 case IX86_BUILTIN_PSLLVV32HI:
18324 case IX86_BUILTIN_PSLLVV4DI:
18325 case IX86_BUILTIN_PSLLVV4DI_MASK:
18326 case IX86_BUILTIN_PSLLVV4SI:
18327 case IX86_BUILTIN_PSLLVV4SI_MASK:
18328 case IX86_BUILTIN_PSLLVV8DI:
18329 case IX86_BUILTIN_PSLLVV8HI:
18330 case IX86_BUILTIN_PSLLVV8SI:
18331 case IX86_BUILTIN_PSLLVV8SI_MASK:
18332 rcode = ASHIFT;
18333 is_vshift = true;
18334 goto do_shift;
18335 case IX86_BUILTIN_PSRAVQ128:
18336 case IX86_BUILTIN_PSRAVQ256:
18337 case IX86_BUILTIN_PSRAVV16HI:
18338 case IX86_BUILTIN_PSRAVV16SI:
18339 case IX86_BUILTIN_PSRAVV32HI:
18340 case IX86_BUILTIN_PSRAVV4SI:
18341 case IX86_BUILTIN_PSRAVV4SI_MASK:
18342 case IX86_BUILTIN_PSRAVV8DI:
18343 case IX86_BUILTIN_PSRAVV8HI:
18344 case IX86_BUILTIN_PSRAVV8SI:
18345 case IX86_BUILTIN_PSRAVV8SI_MASK:
18346 rcode = ASHIFTRT;
18347 is_vshift = true;
18348 goto do_shift;
18349 case IX86_BUILTIN_PSRLVV16HI:
18350 case IX86_BUILTIN_PSRLVV16SI:
18351 case IX86_BUILTIN_PSRLVV2DI:
18352 case IX86_BUILTIN_PSRLVV2DI_MASK:
18353 case IX86_BUILTIN_PSRLVV32HI:
18354 case IX86_BUILTIN_PSRLVV4DI:
18355 case IX86_BUILTIN_PSRLVV4DI_MASK:
18356 case IX86_BUILTIN_PSRLVV4SI:
18357 case IX86_BUILTIN_PSRLVV4SI_MASK:
18358 case IX86_BUILTIN_PSRLVV8DI:
18359 case IX86_BUILTIN_PSRLVV8HI:
18360 case IX86_BUILTIN_PSRLVV8SI:
18361 case IX86_BUILTIN_PSRLVV8SI_MASK:
18362 rcode = LSHIFTRT;
18363 is_vshift = true;
18364 goto do_shift;
18366 do_shift:
18367 gcc_assert (n_args >= 2);
18368 if (TREE_CODE (args[0]) != VECTOR_CST)
18369 break;
18370 mask = HOST_WIDE_INT_M1U;
18371 if (n_args > 2)
18373 /* This is masked shift. */
18374 if (!tree_fits_uhwi_p (args[n_args - 1])
18375 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18376 break;
18377 mask = tree_to_uhwi (args[n_args - 1]);
18378 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18379 mask |= HOST_WIDE_INT_M1U << elems;
18380 if (mask != HOST_WIDE_INT_M1U
18381 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18382 break;
18383 if (mask == (HOST_WIDE_INT_M1U << elems))
18384 return args[n_args - 2];
18386 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18387 break;
18388 if (tree tem = (is_vshift ? integer_one_node
18389 : ix86_vector_shift_count (args[1])))
18391 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18392 unsigned HOST_WIDE_INT prec
18393 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18394 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18395 return args[0];
18396 if (count >= prec)
18398 if (rcode == ASHIFTRT)
18399 count = prec - 1;
18400 else if (mask == HOST_WIDE_INT_M1U)
18401 return build_zero_cst (TREE_TYPE (args[0]));
18403 tree countt = NULL_TREE;
18404 if (!is_vshift)
18406 if (count >= prec)
18407 countt = integer_zero_node;
18408 else
18409 countt = build_int_cst (integer_type_node, count);
18411 tree_vector_builder builder;
18412 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18413 builder.new_vector (TREE_TYPE (args[0]),
18414 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18416 else
18417 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
18418 false);
18419 unsigned int cnt = builder.encoded_nelts ();
18420 for (unsigned int i = 0; i < cnt; ++i)
18422 tree elt = VECTOR_CST_ELT (args[0], i);
18423 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
18424 return NULL_TREE;
18425 tree type = TREE_TYPE (elt);
18426 if (rcode == LSHIFTRT)
18427 elt = fold_convert (unsigned_type_for (type), elt);
18428 if (is_vshift)
18430 countt = VECTOR_CST_ELT (args[1], i);
18431 if (TREE_CODE (countt) != INTEGER_CST
18432 || TREE_OVERFLOW (countt))
18433 return NULL_TREE;
18434 if (wi::neg_p (wi::to_wide (countt))
18435 || wi::to_widest (countt) >= prec)
18437 if (rcode == ASHIFTRT)
18438 countt = build_int_cst (TREE_TYPE (countt),
18439 prec - 1);
18440 else
18442 elt = build_zero_cst (TREE_TYPE (elt));
18443 countt = build_zero_cst (TREE_TYPE (countt));
18447 else if (count >= prec)
18448 elt = build_zero_cst (TREE_TYPE (elt));
18449 elt = const_binop (rcode == ASHIFT
18450 ? LSHIFT_EXPR : RSHIFT_EXPR,
18451 TREE_TYPE (elt), elt, countt);
18452 if (!elt || TREE_CODE (elt) != INTEGER_CST)
18453 return NULL_TREE;
18454 if (rcode == LSHIFTRT)
18455 elt = fold_convert (type, elt);
18456 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
18458 elt = VECTOR_CST_ELT (args[n_args - 2], i);
18459 if (TREE_CODE (elt) != INTEGER_CST
18460 || TREE_OVERFLOW (elt))
18461 return NULL_TREE;
18463 builder.quick_push (elt);
18465 return builder.build ();
18467 break;
18469 default:
18470 break;
18474 #ifdef SUBTARGET_FOLD_BUILTIN
18475 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
18476 #endif
18478 return NULL_TREE;
18481 /* Fold a MD builtin (use ix86_fold_builtin for folding into
18482 constant) in GIMPLE. */
18484 bool
18485 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
18487 gimple *stmt = gsi_stmt (*gsi), *g;
18488 gimple_seq stmts = NULL;
18489 tree fndecl = gimple_call_fndecl (stmt);
18490 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
18491 int n_args = gimple_call_num_args (stmt);
18492 enum ix86_builtins fn_code
18493 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
18494 tree decl = NULL_TREE;
18495 tree arg0, arg1, arg2;
18496 enum rtx_code rcode;
18497 enum tree_code tcode;
18498 unsigned HOST_WIDE_INT count;
18499 bool is_vshift;
18500 unsigned HOST_WIDE_INT elems;
18501 location_t loc;
18503 /* Don't fold when there's isa mismatch. */
18504 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
18505 return false;
18507 switch (fn_code)
18509 case IX86_BUILTIN_TZCNT32:
18510 decl = builtin_decl_implicit (BUILT_IN_CTZ);
18511 goto fold_tzcnt_lzcnt;
18513 case IX86_BUILTIN_TZCNT64:
18514 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
18515 goto fold_tzcnt_lzcnt;
18517 case IX86_BUILTIN_LZCNT32:
18518 decl = builtin_decl_implicit (BUILT_IN_CLZ);
18519 goto fold_tzcnt_lzcnt;
18521 case IX86_BUILTIN_LZCNT64:
18522 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
18523 goto fold_tzcnt_lzcnt;
18525 fold_tzcnt_lzcnt:
18526 gcc_assert (n_args == 1);
18527 arg0 = gimple_call_arg (stmt, 0);
18528 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
18530 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
18531 /* If arg0 is provably non-zero, optimize into generic
18532 __builtin_c[tl]z{,ll} function the middle-end handles
18533 better. */
18534 if (!expr_not_equal_to (arg0, wi::zero (prec)))
18535 return false;
18537 loc = gimple_location (stmt);
18538 g = gimple_build_call (decl, 1, arg0);
18539 gimple_set_location (g, loc);
18540 tree lhs = make_ssa_name (integer_type_node);
18541 gimple_call_set_lhs (g, lhs);
18542 gsi_insert_before (gsi, g, GSI_SAME_STMT);
18543 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
18544 gimple_set_location (g, loc);
18545 gsi_replace (gsi, g, false);
18546 return true;
18548 break;
18550 case IX86_BUILTIN_BZHI32:
18551 case IX86_BUILTIN_BZHI64:
18552 gcc_assert (n_args == 2);
18553 arg1 = gimple_call_arg (stmt, 1);
18554 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
18556 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
18557 arg0 = gimple_call_arg (stmt, 0);
18558 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
18559 break;
18560 loc = gimple_location (stmt);
18561 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18562 gimple_set_location (g, loc);
18563 gsi_replace (gsi, g, false);
18564 return true;
18566 break;
18568 case IX86_BUILTIN_PDEP32:
18569 case IX86_BUILTIN_PDEP64:
18570 case IX86_BUILTIN_PEXT32:
18571 case IX86_BUILTIN_PEXT64:
18572 gcc_assert (n_args == 2);
18573 arg1 = gimple_call_arg (stmt, 1);
18574 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
18576 loc = gimple_location (stmt);
18577 arg0 = gimple_call_arg (stmt, 0);
18578 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18579 gimple_set_location (g, loc);
18580 gsi_replace (gsi, g, false);
18581 return true;
18583 break;
18585 case IX86_BUILTIN_PBLENDVB256:
18586 case IX86_BUILTIN_BLENDVPS256:
18587 case IX86_BUILTIN_BLENDVPD256:
18588 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
18589 to scalar operations and not combined back. */
18590 if (!TARGET_AVX2)
18591 break;
18593 /* FALLTHRU. */
18594 case IX86_BUILTIN_BLENDVPD:
18595 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
18596 w/o sse4.2, it's veclowered to scalar operations and
18597 not combined back. */
18598 if (!TARGET_SSE4_2)
18599 break;
18600 /* FALLTHRU. */
18601 case IX86_BUILTIN_PBLENDVB128:
18602 case IX86_BUILTIN_BLENDVPS:
18603 gcc_assert (n_args == 3);
18604 arg0 = gimple_call_arg (stmt, 0);
18605 arg1 = gimple_call_arg (stmt, 1);
18606 arg2 = gimple_call_arg (stmt, 2);
18607 if (gimple_call_lhs (stmt))
18609 loc = gimple_location (stmt);
18610 tree type = TREE_TYPE (arg2);
18611 if (VECTOR_FLOAT_TYPE_P (type))
18613 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
18614 ? intSI_type_node : intDI_type_node;
18615 type = get_same_sized_vectype (itype, type);
18617 else
18618 type = signed_type_for (type);
18619 arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
18620 tree zero_vec = build_zero_cst (type);
18621 tree cmp_type = truth_type_for (type);
18622 tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
18623 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
18624 g = gimple_build_assign (gimple_call_lhs (stmt),
18625 VEC_COND_EXPR, cmp,
18626 arg1, arg0);
18627 gimple_set_location (g, loc);
18628 gsi_replace (gsi, g, false);
18630 else
18631 gsi_replace (gsi, gimple_build_nop (), false);
18632 return true;
18635 case IX86_BUILTIN_PCMPEQB128:
18636 case IX86_BUILTIN_PCMPEQW128:
18637 case IX86_BUILTIN_PCMPEQD128:
18638 case IX86_BUILTIN_PCMPEQQ:
18639 case IX86_BUILTIN_PCMPEQB256:
18640 case IX86_BUILTIN_PCMPEQW256:
18641 case IX86_BUILTIN_PCMPEQD256:
18642 case IX86_BUILTIN_PCMPEQQ256:
18643 tcode = EQ_EXPR;
18644 goto do_cmp;
18646 case IX86_BUILTIN_PCMPGTB128:
18647 case IX86_BUILTIN_PCMPGTW128:
18648 case IX86_BUILTIN_PCMPGTD128:
18649 case IX86_BUILTIN_PCMPGTQ:
18650 case IX86_BUILTIN_PCMPGTB256:
18651 case IX86_BUILTIN_PCMPGTW256:
18652 case IX86_BUILTIN_PCMPGTD256:
18653 case IX86_BUILTIN_PCMPGTQ256:
18654 tcode = GT_EXPR;
18656 do_cmp:
18657 gcc_assert (n_args == 2);
18658 arg0 = gimple_call_arg (stmt, 0);
18659 arg1 = gimple_call_arg (stmt, 1);
18660 if (gimple_call_lhs (stmt))
18662 loc = gimple_location (stmt);
18663 tree type = TREE_TYPE (arg0);
18664 tree zero_vec = build_zero_cst (type);
18665 tree minus_one_vec = build_minus_one_cst (type);
18666 tree cmp_type = truth_type_for (type);
18667 tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
18668 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
18669 g = gimple_build_assign (gimple_call_lhs (stmt),
18670 VEC_COND_EXPR, cmp,
18671 minus_one_vec, zero_vec);
18672 gimple_set_location (g, loc);
18673 gsi_replace (gsi, g, false);
18675 else
18676 gsi_replace (gsi, gimple_build_nop (), false);
18677 return true;
18679 case IX86_BUILTIN_PSLLD:
18680 case IX86_BUILTIN_PSLLD128:
18681 case IX86_BUILTIN_PSLLD128_MASK:
18682 case IX86_BUILTIN_PSLLD256:
18683 case IX86_BUILTIN_PSLLD256_MASK:
18684 case IX86_BUILTIN_PSLLD512:
18685 case IX86_BUILTIN_PSLLDI:
18686 case IX86_BUILTIN_PSLLDI128:
18687 case IX86_BUILTIN_PSLLDI128_MASK:
18688 case IX86_BUILTIN_PSLLDI256:
18689 case IX86_BUILTIN_PSLLDI256_MASK:
18690 case IX86_BUILTIN_PSLLDI512:
18691 case IX86_BUILTIN_PSLLQ:
18692 case IX86_BUILTIN_PSLLQ128:
18693 case IX86_BUILTIN_PSLLQ128_MASK:
18694 case IX86_BUILTIN_PSLLQ256:
18695 case IX86_BUILTIN_PSLLQ256_MASK:
18696 case IX86_BUILTIN_PSLLQ512:
18697 case IX86_BUILTIN_PSLLQI:
18698 case IX86_BUILTIN_PSLLQI128:
18699 case IX86_BUILTIN_PSLLQI128_MASK:
18700 case IX86_BUILTIN_PSLLQI256:
18701 case IX86_BUILTIN_PSLLQI256_MASK:
18702 case IX86_BUILTIN_PSLLQI512:
18703 case IX86_BUILTIN_PSLLW:
18704 case IX86_BUILTIN_PSLLW128:
18705 case IX86_BUILTIN_PSLLW128_MASK:
18706 case IX86_BUILTIN_PSLLW256:
18707 case IX86_BUILTIN_PSLLW256_MASK:
18708 case IX86_BUILTIN_PSLLW512_MASK:
18709 case IX86_BUILTIN_PSLLWI:
18710 case IX86_BUILTIN_PSLLWI128:
18711 case IX86_BUILTIN_PSLLWI128_MASK:
18712 case IX86_BUILTIN_PSLLWI256:
18713 case IX86_BUILTIN_PSLLWI256_MASK:
18714 case IX86_BUILTIN_PSLLWI512_MASK:
18715 rcode = ASHIFT;
18716 is_vshift = false;
18717 goto do_shift;
18718 case IX86_BUILTIN_PSRAD:
18719 case IX86_BUILTIN_PSRAD128:
18720 case IX86_BUILTIN_PSRAD128_MASK:
18721 case IX86_BUILTIN_PSRAD256:
18722 case IX86_BUILTIN_PSRAD256_MASK:
18723 case IX86_BUILTIN_PSRAD512:
18724 case IX86_BUILTIN_PSRADI:
18725 case IX86_BUILTIN_PSRADI128:
18726 case IX86_BUILTIN_PSRADI128_MASK:
18727 case IX86_BUILTIN_PSRADI256:
18728 case IX86_BUILTIN_PSRADI256_MASK:
18729 case IX86_BUILTIN_PSRADI512:
18730 case IX86_BUILTIN_PSRAQ128_MASK:
18731 case IX86_BUILTIN_PSRAQ256_MASK:
18732 case IX86_BUILTIN_PSRAQ512:
18733 case IX86_BUILTIN_PSRAQI128_MASK:
18734 case IX86_BUILTIN_PSRAQI256_MASK:
18735 case IX86_BUILTIN_PSRAQI512:
18736 case IX86_BUILTIN_PSRAW:
18737 case IX86_BUILTIN_PSRAW128:
18738 case IX86_BUILTIN_PSRAW128_MASK:
18739 case IX86_BUILTIN_PSRAW256:
18740 case IX86_BUILTIN_PSRAW256_MASK:
18741 case IX86_BUILTIN_PSRAW512:
18742 case IX86_BUILTIN_PSRAWI:
18743 case IX86_BUILTIN_PSRAWI128:
18744 case IX86_BUILTIN_PSRAWI128_MASK:
18745 case IX86_BUILTIN_PSRAWI256:
18746 case IX86_BUILTIN_PSRAWI256_MASK:
18747 case IX86_BUILTIN_PSRAWI512:
18748 rcode = ASHIFTRT;
18749 is_vshift = false;
18750 goto do_shift;
18751 case IX86_BUILTIN_PSRLD:
18752 case IX86_BUILTIN_PSRLD128:
18753 case IX86_BUILTIN_PSRLD128_MASK:
18754 case IX86_BUILTIN_PSRLD256:
18755 case IX86_BUILTIN_PSRLD256_MASK:
18756 case IX86_BUILTIN_PSRLD512:
18757 case IX86_BUILTIN_PSRLDI:
18758 case IX86_BUILTIN_PSRLDI128:
18759 case IX86_BUILTIN_PSRLDI128_MASK:
18760 case IX86_BUILTIN_PSRLDI256:
18761 case IX86_BUILTIN_PSRLDI256_MASK:
18762 case IX86_BUILTIN_PSRLDI512:
18763 case IX86_BUILTIN_PSRLQ:
18764 case IX86_BUILTIN_PSRLQ128:
18765 case IX86_BUILTIN_PSRLQ128_MASK:
18766 case IX86_BUILTIN_PSRLQ256:
18767 case IX86_BUILTIN_PSRLQ256_MASK:
18768 case IX86_BUILTIN_PSRLQ512:
18769 case IX86_BUILTIN_PSRLQI:
18770 case IX86_BUILTIN_PSRLQI128:
18771 case IX86_BUILTIN_PSRLQI128_MASK:
18772 case IX86_BUILTIN_PSRLQI256:
18773 case IX86_BUILTIN_PSRLQI256_MASK:
18774 case IX86_BUILTIN_PSRLQI512:
18775 case IX86_BUILTIN_PSRLW:
18776 case IX86_BUILTIN_PSRLW128:
18777 case IX86_BUILTIN_PSRLW128_MASK:
18778 case IX86_BUILTIN_PSRLW256:
18779 case IX86_BUILTIN_PSRLW256_MASK:
18780 case IX86_BUILTIN_PSRLW512:
18781 case IX86_BUILTIN_PSRLWI:
18782 case IX86_BUILTIN_PSRLWI128:
18783 case IX86_BUILTIN_PSRLWI128_MASK:
18784 case IX86_BUILTIN_PSRLWI256:
18785 case IX86_BUILTIN_PSRLWI256_MASK:
18786 case IX86_BUILTIN_PSRLWI512:
18787 rcode = LSHIFTRT;
18788 is_vshift = false;
18789 goto do_shift;
18790 case IX86_BUILTIN_PSLLVV16HI:
18791 case IX86_BUILTIN_PSLLVV16SI:
18792 case IX86_BUILTIN_PSLLVV2DI:
18793 case IX86_BUILTIN_PSLLVV2DI_MASK:
18794 case IX86_BUILTIN_PSLLVV32HI:
18795 case IX86_BUILTIN_PSLLVV4DI:
18796 case IX86_BUILTIN_PSLLVV4DI_MASK:
18797 case IX86_BUILTIN_PSLLVV4SI:
18798 case IX86_BUILTIN_PSLLVV4SI_MASK:
18799 case IX86_BUILTIN_PSLLVV8DI:
18800 case IX86_BUILTIN_PSLLVV8HI:
18801 case IX86_BUILTIN_PSLLVV8SI:
18802 case IX86_BUILTIN_PSLLVV8SI_MASK:
18803 rcode = ASHIFT;
18804 is_vshift = true;
18805 goto do_shift;
18806 case IX86_BUILTIN_PSRAVQ128:
18807 case IX86_BUILTIN_PSRAVQ256:
18808 case IX86_BUILTIN_PSRAVV16HI:
18809 case IX86_BUILTIN_PSRAVV16SI:
18810 case IX86_BUILTIN_PSRAVV32HI:
18811 case IX86_BUILTIN_PSRAVV4SI:
18812 case IX86_BUILTIN_PSRAVV4SI_MASK:
18813 case IX86_BUILTIN_PSRAVV8DI:
18814 case IX86_BUILTIN_PSRAVV8HI:
18815 case IX86_BUILTIN_PSRAVV8SI:
18816 case IX86_BUILTIN_PSRAVV8SI_MASK:
18817 rcode = ASHIFTRT;
18818 is_vshift = true;
18819 goto do_shift;
18820 case IX86_BUILTIN_PSRLVV16HI:
18821 case IX86_BUILTIN_PSRLVV16SI:
18822 case IX86_BUILTIN_PSRLVV2DI:
18823 case IX86_BUILTIN_PSRLVV2DI_MASK:
18824 case IX86_BUILTIN_PSRLVV32HI:
18825 case IX86_BUILTIN_PSRLVV4DI:
18826 case IX86_BUILTIN_PSRLVV4DI_MASK:
18827 case IX86_BUILTIN_PSRLVV4SI:
18828 case IX86_BUILTIN_PSRLVV4SI_MASK:
18829 case IX86_BUILTIN_PSRLVV8DI:
18830 case IX86_BUILTIN_PSRLVV8HI:
18831 case IX86_BUILTIN_PSRLVV8SI:
18832 case IX86_BUILTIN_PSRLVV8SI_MASK:
18833 rcode = LSHIFTRT;
18834 is_vshift = true;
18835 goto do_shift;
18837 do_shift:
18838 gcc_assert (n_args >= 2);
18839 if (!gimple_call_lhs (stmt))
18840 break;
18841 arg0 = gimple_call_arg (stmt, 0);
18842 arg1 = gimple_call_arg (stmt, 1);
18843 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18844 /* For masked shift, only optimize if the mask is all ones. */
18845 if (n_args > 2
18846 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
18847 break;
18848 if (is_vshift)
18850 if (TREE_CODE (arg1) != VECTOR_CST)
18851 break;
18852 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
18853 if (integer_zerop (arg1))
18854 count = 0;
18855 else if (rcode == ASHIFTRT)
18856 break;
18857 else
18858 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
18860 tree elt = VECTOR_CST_ELT (arg1, i);
18861 if (!wi::neg_p (wi::to_wide (elt))
18862 && wi::to_widest (elt) < count)
18863 return false;
18866 else
18868 arg1 = ix86_vector_shift_count (arg1);
18869 if (!arg1)
18870 break;
18871 count = tree_to_uhwi (arg1);
18873 if (count == 0)
18875 /* Just return the first argument for shift by 0. */
18876 loc = gimple_location (stmt);
18877 g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18878 gimple_set_location (g, loc);
18879 gsi_replace (gsi, g, false);
18880 return true;
18882 if (rcode != ASHIFTRT
18883 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
18885 /* For shift counts equal or greater than precision, except for
18886 arithmetic right shift the result is zero. */
18887 loc = gimple_location (stmt);
18888 g = gimple_build_assign (gimple_call_lhs (stmt),
18889 build_zero_cst (TREE_TYPE (arg0)));
18890 gimple_set_location (g, loc);
18891 gsi_replace (gsi, g, false);
18892 return true;
18894 break;
18896 case IX86_BUILTIN_SHUFPD512:
18897 case IX86_BUILTIN_SHUFPS512:
18898 case IX86_BUILTIN_SHUFPD:
18899 case IX86_BUILTIN_SHUFPD256:
18900 case IX86_BUILTIN_SHUFPS:
18901 case IX86_BUILTIN_SHUFPS256:
18902 arg0 = gimple_call_arg (stmt, 0);
18903 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18904 /* This is masked shuffle. Only optimize if the mask is all ones. */
18905 if (n_args > 3
18906 && !ix86_masked_all_ones (elems,
18907 gimple_call_arg (stmt, n_args - 1)))
18908 break;
18909 arg2 = gimple_call_arg (stmt, 2);
18910 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
18912 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
18913 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
18914 if (shuffle_mask > 255)
18915 return false;
18917 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
18918 loc = gimple_location (stmt);
18919 tree itype = (imode == E_DFmode
18920 ? long_long_integer_type_node : integer_type_node);
18921 tree vtype = build_vector_type (itype, elems);
18922 tree_vector_builder elts (vtype, elems, 1);
18925 /* Transform integer shuffle_mask to vector perm_mask which
18926 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
18927 for (unsigned i = 0; i != elems; i++)
18929 unsigned sel_idx;
18930 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
18931 provide 2 select constrols for each element of the
18932 destination. */
18933 if (imode == E_DFmode)
18934 sel_idx = (i & 1) * elems + (i & ~1)
18935 + ((shuffle_mask >> i) & 1);
18936 else
18938 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
18939 controls for each element of the destination. */
18940 unsigned j = i % 4;
18941 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
18942 + ((shuffle_mask >> 2 * j) & 3);
18944 elts.quick_push (build_int_cst (itype, sel_idx));
18947 tree perm_mask = elts.build ();
18948 arg1 = gimple_call_arg (stmt, 1);
18949 g = gimple_build_assign (gimple_call_lhs (stmt),
18950 VEC_PERM_EXPR,
18951 arg0, arg1, perm_mask);
18952 gimple_set_location (g, loc);
18953 gsi_replace (gsi, g, false);
18954 return true;
18956 // Do not error yet, the constant could be propagated later?
18957 break;
18959 case IX86_BUILTIN_PABSB:
18960 case IX86_BUILTIN_PABSW:
18961 case IX86_BUILTIN_PABSD:
18962 /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
18963 if (!TARGET_MMX_WITH_SSE)
18964 break;
18965 /* FALLTHRU. */
18966 case IX86_BUILTIN_PABSB128:
18967 case IX86_BUILTIN_PABSB256:
18968 case IX86_BUILTIN_PABSB512:
18969 case IX86_BUILTIN_PABSW128:
18970 case IX86_BUILTIN_PABSW256:
18971 case IX86_BUILTIN_PABSW512:
18972 case IX86_BUILTIN_PABSD128:
18973 case IX86_BUILTIN_PABSD256:
18974 case IX86_BUILTIN_PABSD512:
18975 case IX86_BUILTIN_PABSQ128:
18976 case IX86_BUILTIN_PABSQ256:
18977 case IX86_BUILTIN_PABSQ512:
18978 case IX86_BUILTIN_PABSB128_MASK:
18979 case IX86_BUILTIN_PABSB256_MASK:
18980 case IX86_BUILTIN_PABSW128_MASK:
18981 case IX86_BUILTIN_PABSW256_MASK:
18982 case IX86_BUILTIN_PABSD128_MASK:
18983 case IX86_BUILTIN_PABSD256_MASK:
18984 gcc_assert (n_args >= 1);
18985 if (!gimple_call_lhs (stmt))
18986 break;
18987 arg0 = gimple_call_arg (stmt, 0);
18988 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18989 /* For masked ABS, only optimize if the mask is all ones. */
18990 if (n_args > 1
18991 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
18992 break;
18994 tree utype, ures, vce;
18995 utype = unsigned_type_for (TREE_TYPE (arg0));
18996 /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
18997 instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
18998 ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
18999 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
19000 loc = gimple_location (stmt);
19001 vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
19002 g = gimple_build_assign (gimple_call_lhs (stmt),
19003 VIEW_CONVERT_EXPR, vce);
19004 gsi_replace (gsi, g, false);
19006 return true;
19008 default:
19009 break;
19012 return false;
19015 /* Handler for an SVML-style interface to
19016 a library with vectorized intrinsics. */
19018 tree
19019 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
19021 char name[20];
19022 tree fntype, new_fndecl, args;
19023 unsigned arity;
19024 const char *bname;
19025 machine_mode el_mode, in_mode;
19026 int n, in_n;
19028 /* The SVML is suitable for unsafe math only. */
19029 if (!flag_unsafe_math_optimizations)
19030 return NULL_TREE;
19032 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19033 n = TYPE_VECTOR_SUBPARTS (type_out);
19034 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19035 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19036 if (el_mode != in_mode
19037 || n != in_n)
19038 return NULL_TREE;
19040 switch (fn)
19042 CASE_CFN_EXP:
19043 CASE_CFN_LOG:
19044 CASE_CFN_LOG10:
19045 CASE_CFN_POW:
19046 CASE_CFN_TANH:
19047 CASE_CFN_TAN:
19048 CASE_CFN_ATAN:
19049 CASE_CFN_ATAN2:
19050 CASE_CFN_ATANH:
19051 CASE_CFN_CBRT:
19052 CASE_CFN_SINH:
19053 CASE_CFN_SIN:
19054 CASE_CFN_ASINH:
19055 CASE_CFN_ASIN:
19056 CASE_CFN_COSH:
19057 CASE_CFN_COS:
19058 CASE_CFN_ACOSH:
19059 CASE_CFN_ACOS:
19060 if ((el_mode != DFmode || n != 2)
19061 && (el_mode != SFmode || n != 4))
19062 return NULL_TREE;
19063 break;
19065 default:
19066 return NULL_TREE;
19069 tree fndecl = mathfn_built_in (el_mode == DFmode
19070 ? double_type_node : float_type_node, fn);
19071 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19073 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
19074 strcpy (name, "vmlsLn4");
19075 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
19076 strcpy (name, "vmldLn2");
19077 else if (n == 4)
19079 sprintf (name, "vmls%s", bname+10);
19080 name[strlen (name)-1] = '4';
19082 else
19083 sprintf (name, "vmld%s2", bname+10);
19085 /* Convert to uppercase. */
19086 name[4] &= ~0x20;
19088 arity = 0;
19089 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19090 arity++;
19092 if (arity == 1)
19093 fntype = build_function_type_list (type_out, type_in, NULL);
19094 else
19095 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19097 /* Build a function declaration for the vectorized function. */
19098 new_fndecl = build_decl (BUILTINS_LOCATION,
19099 FUNCTION_DECL, get_identifier (name), fntype);
19100 TREE_PUBLIC (new_fndecl) = 1;
19101 DECL_EXTERNAL (new_fndecl) = 1;
19102 DECL_IS_NOVOPS (new_fndecl) = 1;
19103 TREE_READONLY (new_fndecl) = 1;
19105 return new_fndecl;
19108 /* Handler for an ACML-style interface to
19109 a library with vectorized intrinsics. */
19111 tree
19112 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
19114 char name[20] = "__vr.._";
19115 tree fntype, new_fndecl, args;
19116 unsigned arity;
19117 const char *bname;
19118 machine_mode el_mode, in_mode;
19119 int n, in_n;
19121 /* The ACML is 64bits only and suitable for unsafe math only as
19122 it does not correctly support parts of IEEE with the required
19123 precision such as denormals. */
19124 if (!TARGET_64BIT
19125 || !flag_unsafe_math_optimizations)
19126 return NULL_TREE;
19128 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19129 n = TYPE_VECTOR_SUBPARTS (type_out);
19130 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19131 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19132 if (el_mode != in_mode
19133 || n != in_n)
19134 return NULL_TREE;
19136 switch (fn)
19138 CASE_CFN_SIN:
19139 CASE_CFN_COS:
19140 CASE_CFN_EXP:
19141 CASE_CFN_LOG:
19142 CASE_CFN_LOG2:
19143 CASE_CFN_LOG10:
19144 if (el_mode == DFmode && n == 2)
19146 name[4] = 'd';
19147 name[5] = '2';
19149 else if (el_mode == SFmode && n == 4)
19151 name[4] = 's';
19152 name[5] = '4';
19154 else
19155 return NULL_TREE;
19156 break;
19158 default:
19159 return NULL_TREE;
19162 tree fndecl = mathfn_built_in (el_mode == DFmode
19163 ? double_type_node : float_type_node, fn);
19164 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19165 sprintf (name + 7, "%s", bname+10);
19167 arity = 0;
19168 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19169 arity++;
19171 if (arity == 1)
19172 fntype = build_function_type_list (type_out, type_in, NULL);
19173 else
19174 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19176 /* Build a function declaration for the vectorized function. */
19177 new_fndecl = build_decl (BUILTINS_LOCATION,
19178 FUNCTION_DECL, get_identifier (name), fntype);
19179 TREE_PUBLIC (new_fndecl) = 1;
19180 DECL_EXTERNAL (new_fndecl) = 1;
19181 DECL_IS_NOVOPS (new_fndecl) = 1;
19182 TREE_READONLY (new_fndecl) = 1;
19184 return new_fndecl;
19187 /* Returns a decl of a function that implements scatter store with
19188 register type VECTYPE and index type INDEX_TYPE and SCALE.
19189 Return NULL_TREE if it is not available. */
19191 static tree
19192 ix86_vectorize_builtin_scatter (const_tree vectype,
19193 const_tree index_type, int scale)
19195 bool si;
19196 enum ix86_builtins code;
19198 if (!TARGET_AVX512F)
19199 return NULL_TREE;
19201 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
19202 ? !TARGET_USE_SCATTER_2PARTS
19203 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
19204 ? !TARGET_USE_SCATTER_4PARTS
19205 : !TARGET_USE_SCATTER_8PARTS))
19206 return NULL_TREE;
19208 if ((TREE_CODE (index_type) != INTEGER_TYPE
19209 && !POINTER_TYPE_P (index_type))
19210 || (TYPE_MODE (index_type) != SImode
19211 && TYPE_MODE (index_type) != DImode))
19212 return NULL_TREE;
19214 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
19215 return NULL_TREE;
19217 /* v*scatter* insn sign extends index to pointer mode. */
19218 if (TYPE_PRECISION (index_type) < POINTER_SIZE
19219 && TYPE_UNSIGNED (index_type))
19220 return NULL_TREE;
19222 /* Scale can be 1, 2, 4 or 8. */
19223 if (scale <= 0
19224 || scale > 8
19225 || (scale & (scale - 1)) != 0)
19226 return NULL_TREE;
19228 si = TYPE_MODE (index_type) == SImode;
19229 switch (TYPE_MODE (vectype))
19231 case E_V8DFmode:
19232 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
19233 break;
19234 case E_V8DImode:
19235 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
19236 break;
19237 case E_V16SFmode:
19238 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
19239 break;
19240 case E_V16SImode:
19241 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
19242 break;
19243 case E_V4DFmode:
19244 if (TARGET_AVX512VL)
19245 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
19246 else
19247 return NULL_TREE;
19248 break;
19249 case E_V4DImode:
19250 if (TARGET_AVX512VL)
19251 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
19252 else
19253 return NULL_TREE;
19254 break;
19255 case E_V8SFmode:
19256 if (TARGET_AVX512VL)
19257 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
19258 else
19259 return NULL_TREE;
19260 break;
19261 case E_V8SImode:
19262 if (TARGET_AVX512VL)
19263 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
19264 else
19265 return NULL_TREE;
19266 break;
19267 case E_V2DFmode:
19268 if (TARGET_AVX512VL)
19269 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
19270 else
19271 return NULL_TREE;
19272 break;
19273 case E_V2DImode:
19274 if (TARGET_AVX512VL)
19275 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
19276 else
19277 return NULL_TREE;
19278 break;
19279 case E_V4SFmode:
19280 if (TARGET_AVX512VL)
19281 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
19282 else
19283 return NULL_TREE;
19284 break;
19285 case E_V4SImode:
19286 if (TARGET_AVX512VL)
19287 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19288 else
19289 return NULL_TREE;
19290 break;
19291 default:
19292 return NULL_TREE;
19295 return get_ix86_builtin (code);
19298 /* Return true if it is safe to use the rsqrt optabs to optimize
19299 1.0/sqrt. */
19301 static bool
19302 use_rsqrt_p (machine_mode mode)
19304 return ((mode == HFmode
19305 || (TARGET_SSE && TARGET_SSE_MATH))
19306 && flag_finite_math_only
19307 && !flag_trapping_math
19308 && flag_unsafe_math_optimizations);
19311 /* Helper for avx_vpermilps256_operand et al. This is also used by
19312 the expansion functions to turn the parallel back into a mask.
19313 The return value is 0 for no match and the imm8+1 for a match. */
19316 avx_vpermilp_parallel (rtx par, machine_mode mode)
19318 unsigned i, nelt = GET_MODE_NUNITS (mode);
19319 unsigned mask = 0;
19320 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
19322 if (XVECLEN (par, 0) != (int) nelt)
19323 return 0;
19325 /* Validate that all of the elements are constants, and not totally
19326 out of range. Copy the data into an integral array to make the
19327 subsequent checks easier. */
19328 for (i = 0; i < nelt; ++i)
19330 rtx er = XVECEXP (par, 0, i);
19331 unsigned HOST_WIDE_INT ei;
19333 if (!CONST_INT_P (er))
19334 return 0;
19335 ei = INTVAL (er);
19336 if (ei >= nelt)
19337 return 0;
19338 ipar[i] = ei;
19341 switch (mode)
19343 case E_V8DFmode:
19344 /* In the 512-bit DFmode case, we can only move elements within
19345 a 128-bit lane. First fill the second part of the mask,
19346 then fallthru. */
19347 for (i = 4; i < 6; ++i)
19349 if (ipar[i] < 4 || ipar[i] >= 6)
19350 return 0;
19351 mask |= (ipar[i] - 4) << i;
19353 for (i = 6; i < 8; ++i)
19355 if (ipar[i] < 6)
19356 return 0;
19357 mask |= (ipar[i] - 6) << i;
19359 /* FALLTHRU */
19361 case E_V4DFmode:
19362 /* In the 256-bit DFmode case, we can only move elements within
19363 a 128-bit lane. */
19364 for (i = 0; i < 2; ++i)
19366 if (ipar[i] >= 2)
19367 return 0;
19368 mask |= ipar[i] << i;
19370 for (i = 2; i < 4; ++i)
19372 if (ipar[i] < 2)
19373 return 0;
19374 mask |= (ipar[i] - 2) << i;
19376 break;
19378 case E_V16SFmode:
19379 /* In 512 bit SFmode case, permutation in the upper 256 bits
19380 must mirror the permutation in the lower 256-bits. */
19381 for (i = 0; i < 8; ++i)
19382 if (ipar[i] + 8 != ipar[i + 8])
19383 return 0;
19384 /* FALLTHRU */
19386 case E_V8SFmode:
19387 /* In 256 bit SFmode case, we have full freedom of
19388 movement within the low 128-bit lane, but the high 128-bit
19389 lane must mirror the exact same pattern. */
19390 for (i = 0; i < 4; ++i)
19391 if (ipar[i] + 4 != ipar[i + 4])
19392 return 0;
19393 nelt = 4;
19394 /* FALLTHRU */
19396 case E_V2DFmode:
19397 case E_V4SFmode:
19398 /* In the 128-bit case, we've full freedom in the placement of
19399 the elements from the source operand. */
19400 for (i = 0; i < nelt; ++i)
19401 mask |= ipar[i] << (i * (nelt / 2));
19402 break;
19404 default:
19405 gcc_unreachable ();
19408 /* Make sure success has a non-zero value by adding one. */
19409 return mask + 1;
19412 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19413 the expansion functions to turn the parallel back into a mask.
19414 The return value is 0 for no match and the imm8+1 for a match. */
19417 avx_vperm2f128_parallel (rtx par, machine_mode mode)
19419 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
19420 unsigned mask = 0;
19421 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
19423 if (XVECLEN (par, 0) != (int) nelt)
19424 return 0;
19426 /* Validate that all of the elements are constants, and not totally
19427 out of range. Copy the data into an integral array to make the
19428 subsequent checks easier. */
19429 for (i = 0; i < nelt; ++i)
19431 rtx er = XVECEXP (par, 0, i);
19432 unsigned HOST_WIDE_INT ei;
19434 if (!CONST_INT_P (er))
19435 return 0;
19436 ei = INTVAL (er);
19437 if (ei >= 2 * nelt)
19438 return 0;
19439 ipar[i] = ei;
19442 /* Validate that the halves of the permute are halves. */
19443 for (i = 0; i < nelt2 - 1; ++i)
19444 if (ipar[i] + 1 != ipar[i + 1])
19445 return 0;
19446 for (i = nelt2; i < nelt - 1; ++i)
19447 if (ipar[i] + 1 != ipar[i + 1])
19448 return 0;
19450 /* Reconstruct the mask. */
19451 for (i = 0; i < 2; ++i)
19453 unsigned e = ipar[i * nelt2];
19454 if (e % nelt2)
19455 return 0;
19456 e /= nelt2;
19457 mask |= e << (i * 4);
19460 /* Make sure success has a non-zero value by adding one. */
19461 return mask + 1;
19464 /* Return a mask of VPTERNLOG operands that do not affect output. */
19467 vpternlog_redundant_operand_mask (rtx pternlog_imm)
19469 int mask = 0;
19470 int imm8 = INTVAL (pternlog_imm);
19472 if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
19473 mask |= 1;
19474 if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
19475 mask |= 2;
19476 if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
19477 mask |= 4;
19479 return mask;
19482 /* Eliminate false dependencies on operands that do not affect output
19483 by substituting other operands of a VPTERNLOG. */
19485 void
19486 substitute_vpternlog_operands (rtx *operands)
19488 int mask = vpternlog_redundant_operand_mask (operands[4]);
19490 if (mask & 1) /* The first operand is redundant. */
19491 operands[1] = operands[2];
19493 if (mask & 2) /* The second operand is redundant. */
19494 operands[2] = operands[1];
19496 if (mask & 4) /* The third operand is redundant. */
19497 operands[3] = operands[1];
19498 else if (REG_P (operands[3]))
19500 if (mask & 1)
19501 operands[1] = operands[3];
19502 if (mask & 2)
19503 operands[2] = operands[3];
19507 /* Return a register priority for hard reg REGNO. */
19508 static int
19509 ix86_register_priority (int hard_regno)
19511 /* ebp and r13 as the base always wants a displacement, r12 as the
19512 base always wants an index. So discourage their usage in an
19513 address. */
19514 if (hard_regno == R12_REG || hard_regno == R13_REG)
19515 return 0;
19516 if (hard_regno == BP_REG)
19517 return 1;
19518 /* New x86-64 int registers result in bigger code size. Discourage them. */
19519 if (REX_INT_REGNO_P (hard_regno))
19520 return 2;
19521 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
19522 if (REX_SSE_REGNO_P (hard_regno))
19523 return 2;
19524 if (EXT_REX_SSE_REGNO_P (hard_regno))
19525 return 1;
19526 /* Usage of AX register results in smaller code. Prefer it. */
19527 if (hard_regno == AX_REG)
19528 return 4;
19529 return 3;
19532 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
19534 Put float CONST_DOUBLE in the constant pool instead of fp regs.
19535 QImode must go into class Q_REGS.
19536 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19537 movdf to do mem-to-mem moves through integer regs. */
19539 static reg_class_t
19540 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
19542 machine_mode mode = GET_MODE (x);
19544 /* We're only allowed to return a subclass of CLASS. Many of the
19545 following checks fail for NO_REGS, so eliminate that early. */
19546 if (regclass == NO_REGS)
19547 return NO_REGS;
19549 /* All classes can load zeros. */
19550 if (x == CONST0_RTX (mode))
19551 return regclass;
19553 /* Force constants into memory if we are loading a (nonzero) constant into
19554 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
19555 instructions to load from a constant. */
19556 if (CONSTANT_P (x)
19557 && (MAYBE_MMX_CLASS_P (regclass)
19558 || MAYBE_SSE_CLASS_P (regclass)
19559 || MAYBE_MASK_CLASS_P (regclass)))
19560 return NO_REGS;
19562 /* Floating-point constants need more complex checks. */
19563 if (CONST_DOUBLE_P (x))
19565 /* General regs can load everything. */
19566 if (INTEGER_CLASS_P (regclass))
19567 return regclass;
19569 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19570 zero above. We only want to wind up preferring 80387 registers if
19571 we plan on doing computation with them. */
19572 if (IS_STACK_MODE (mode)
19573 && standard_80387_constant_p (x) > 0)
19575 /* Limit class to FP regs. */
19576 if (FLOAT_CLASS_P (regclass))
19577 return FLOAT_REGS;
19580 return NO_REGS;
19583 /* Prefer SSE if we can use them for math. Also allow integer regs
19584 when moves between register units are cheap. */
19585 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19587 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
19588 && TARGET_INTER_UNIT_MOVES_TO_VEC
19589 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
19590 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19591 else
19592 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19595 /* Generally when we see PLUS here, it's the function invariant
19596 (plus soft-fp const_int). Which can only be computed into general
19597 regs. */
19598 if (GET_CODE (x) == PLUS)
19599 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
19601 /* QImode constants are easy to load, but non-constant QImode data
19602 must go into Q_REGS or ALL_MASK_REGS. */
19603 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19605 if (Q_CLASS_P (regclass))
19606 return regclass;
19607 else if (reg_class_subset_p (Q_REGS, regclass))
19608 return Q_REGS;
19609 else if (MASK_CLASS_P (regclass))
19610 return regclass;
19611 else
19612 return NO_REGS;
19615 return regclass;
19618 /* Discourage putting floating-point values in SSE registers unless
19619 SSE math is being used, and likewise for the 387 registers. */
19620 static reg_class_t
19621 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
19623 /* Restrict the output reload class to the register bank that we are doing
19624 math on. If we would like not to return a subset of CLASS, reject this
19625 alternative: if reload cannot do this, it will still use its choice. */
19626 machine_mode mode = GET_MODE (x);
19627 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19628 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
19630 if (IS_STACK_MODE (mode))
19631 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
19633 return regclass;
19636 static reg_class_t
19637 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
19638 machine_mode mode, secondary_reload_info *sri)
19640 /* Double-word spills from general registers to non-offsettable memory
19641 references (zero-extended addresses) require special handling. */
19642 if (TARGET_64BIT
19643 && MEM_P (x)
19644 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
19645 && INTEGER_CLASS_P (rclass)
19646 && !offsettable_memref_p (x))
19648 sri->icode = (in_p
19649 ? CODE_FOR_reload_noff_load
19650 : CODE_FOR_reload_noff_store);
19651 /* Add the cost of moving address to a temporary. */
19652 sri->extra_cost = 1;
19654 return NO_REGS;
19657 /* QImode spills from non-QI registers require
19658 intermediate register on 32bit targets. */
19659 if (mode == QImode
19660 && ((!TARGET_64BIT && !in_p
19661 && INTEGER_CLASS_P (rclass)
19662 && MAYBE_NON_Q_CLASS_P (rclass))
19663 || (!TARGET_AVX512DQ
19664 && MAYBE_MASK_CLASS_P (rclass))))
19666 int regno = true_regnum (x);
19668 /* Return Q_REGS if the operand is in memory. */
19669 if (regno == -1)
19670 return Q_REGS;
19672 return NO_REGS;
19675 /* Require movement to gpr, and then store to memory. */
19676 if ((mode == HFmode || mode == HImode || mode == V2QImode
19677 || mode == BFmode)
19678 && !TARGET_SSE4_1
19679 && SSE_CLASS_P (rclass)
19680 && !in_p && MEM_P (x))
19682 sri->extra_cost = 1;
19683 return GENERAL_REGS;
19686 /* This condition handles corner case where an expression involving
19687 pointers gets vectorized. We're trying to use the address of a
19688 stack slot as a vector initializer.
19690 (set (reg:V2DI 74 [ vect_cst_.2 ])
19691 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
19693 Eventually frame gets turned into sp+offset like this:
19695 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19696 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19697 (const_int 392 [0x188]))))
19699 That later gets turned into:
19701 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19702 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19703 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
19705 We'll have the following reload recorded:
19707 Reload 0: reload_in (DI) =
19708 (plus:DI (reg/f:DI 7 sp)
19709 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
19710 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19711 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
19712 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
19713 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19714 reload_reg_rtx: (reg:V2DI 22 xmm1)
19716 Which isn't going to work since SSE instructions can't handle scalar
19717 additions. Returning GENERAL_REGS forces the addition into integer
19718 register and reload can handle subsequent reloads without problems. */
19720 if (in_p && GET_CODE (x) == PLUS
19721 && SSE_CLASS_P (rclass)
19722 && SCALAR_INT_MODE_P (mode))
19723 return GENERAL_REGS;
19725 return NO_REGS;
19728 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
19730 static bool
19731 ix86_class_likely_spilled_p (reg_class_t rclass)
19733 switch (rclass)
19735 case AREG:
19736 case DREG:
19737 case CREG:
19738 case BREG:
19739 case AD_REGS:
19740 case SIREG:
19741 case DIREG:
19742 case SSE_FIRST_REG:
19743 case FP_TOP_REG:
19744 case FP_SECOND_REG:
19745 return true;
19747 default:
19748 break;
19751 return false;
19754 /* Return true if a set of DST by the expression SRC should be allowed.
19755 This prevents complex sets of likely_spilled hard regs before reload. */
19757 bool
19758 ix86_hardreg_mov_ok (rtx dst, rtx src)
19760 /* Avoid complex sets of likely_spilled hard registers before reload. */
19761 if (REG_P (dst) && HARD_REGISTER_P (dst)
19762 && !REG_P (src) && !MEM_P (src)
19763 && !(VECTOR_MODE_P (GET_MODE (dst))
19764 ? standard_sse_constant_p (src, GET_MODE (dst))
19765 : x86_64_immediate_operand (src, GET_MODE (dst)))
19766 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
19767 && !reload_completed)
19768 return false;
19769 return true;
19772 /* If we are copying between registers from different register sets
19773 (e.g. FP and integer), we may need a memory location.
19775 The function can't work reliably when one of the CLASSES is a class
19776 containing registers from multiple sets. We avoid this by never combining
19777 different sets in a single alternative in the machine description.
19778 Ensure that this constraint holds to avoid unexpected surprises.
19780 When STRICT is false, we are being called from REGISTER_MOVE_COST,
19781 so do not enforce these sanity checks.
19783 To optimize register_move_cost performance, define inline variant. */
19785 static inline bool
19786 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
19787 reg_class_t class2, int strict)
19789 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
19790 return false;
19792 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19793 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19794 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19795 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19796 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19797 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
19798 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
19799 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
19801 gcc_assert (!strict || lra_in_progress);
19802 return true;
19805 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19806 return true;
19808 /* ??? This is a lie. We do have moves between mmx/general, and for
19809 mmx/sse2. But by saying we need secondary memory we discourage the
19810 register allocator from using the mmx registers unless needed. */
19811 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19812 return true;
19814 /* Between mask and general, we have moves no larger than word size. */
19815 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
19817 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
19818 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19819 return true;
19822 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19824 /* SSE1 doesn't have any direct moves from other classes. */
19825 if (!TARGET_SSE2)
19826 return true;
19828 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
19829 return true;
19831 int msize = GET_MODE_SIZE (mode);
19833 /* Between SSE and general, we have moves no larger than word size. */
19834 if (msize > UNITS_PER_WORD)
19835 return true;
19837 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
19838 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
19839 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
19841 if (msize < minsize)
19842 return true;
19844 /* If the target says that inter-unit moves are more expensive
19845 than moving through memory, then don't generate them. */
19846 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
19847 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
19848 return true;
19851 return false;
19854 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
19856 static bool
19857 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
19858 reg_class_t class2)
19860 return inline_secondary_memory_needed (mode, class1, class2, true);
19863 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
19865 get_secondary_mem widens integral modes to BITS_PER_WORD.
19866 There is no need to emit full 64 bit move on 64 bit targets
19867 for integral modes that can be moved using 32 bit move. */
19869 static machine_mode
19870 ix86_secondary_memory_needed_mode (machine_mode mode)
19872 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
19873 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
19874 return mode;
19877 /* Implement the TARGET_CLASS_MAX_NREGS hook.
19879 On the 80386, this is the size of MODE in words,
19880 except in the FP regs, where a single reg is always enough. */
19882 static unsigned char
19883 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
19885 if (MAYBE_INTEGER_CLASS_P (rclass))
19887 if (mode == XFmode)
19888 return (TARGET_64BIT ? 2 : 3);
19889 else if (mode == XCmode)
19890 return (TARGET_64BIT ? 4 : 6);
19891 else
19892 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19894 else
19896 if (COMPLEX_MODE_P (mode))
19897 return 2;
19898 else
19899 return 1;
19903 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19905 static bool
19906 ix86_can_change_mode_class (machine_mode from, machine_mode to,
19907 reg_class_t regclass)
19909 if (from == to)
19910 return true;
19912 /* x87 registers can't do subreg at all, as all values are reformatted
19913 to extended precision. */
19914 if (MAYBE_FLOAT_CLASS_P (regclass))
19915 return false;
19917 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
19919 /* Vector registers do not support QI or HImode loads. If we don't
19920 disallow a change to these modes, reload will assume it's ok to
19921 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19922 the vec_dupv4hi pattern.
19923 NB: SSE2 can load 16bit data to sse register via pinsrw. */
19924 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
19925 if (GET_MODE_SIZE (from) < mov_size
19926 || GET_MODE_SIZE (to) < mov_size)
19927 return false;
19930 return true;
19933 /* Return index of MODE in the sse load/store tables. */
19935 static inline int
19936 sse_store_index (machine_mode mode)
19938 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
19939 costs to processor_costs, which requires changes to all entries in
19940 processor cost table. */
19941 if (mode == E_HFmode)
19942 mode = E_SFmode;
19944 switch (GET_MODE_SIZE (mode))
19946 case 4:
19947 return 0;
19948 case 8:
19949 return 1;
19950 case 16:
19951 return 2;
19952 case 32:
19953 return 3;
19954 case 64:
19955 return 4;
19956 default:
19957 return -1;
19961 /* Return the cost of moving data of mode M between a
19962 register and memory. A value of 2 is the default; this cost is
19963 relative to those in `REGISTER_MOVE_COST'.
19965 This function is used extensively by register_move_cost that is used to
19966 build tables at startup. Make it inline in this case.
19967 When IN is 2, return maximum of in and out move cost.
19969 If moving between registers and memory is more expensive than
19970 between two registers, you should define this macro to express the
19971 relative cost.
19973 Model also increased moving costs of QImode registers in non
19974 Q_REGS classes.
19976 static inline int
19977 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
19979 int cost;
19981 if (FLOAT_CLASS_P (regclass))
19983 int index;
19984 switch (mode)
19986 case E_SFmode:
19987 index = 0;
19988 break;
19989 case E_DFmode:
19990 index = 1;
19991 break;
19992 case E_XFmode:
19993 index = 2;
19994 break;
19995 default:
19996 return 100;
19998 if (in == 2)
19999 return MAX (ix86_cost->hard_register.fp_load [index],
20000 ix86_cost->hard_register.fp_store [index]);
20001 return in ? ix86_cost->hard_register.fp_load [index]
20002 : ix86_cost->hard_register.fp_store [index];
20004 if (SSE_CLASS_P (regclass))
20006 int index = sse_store_index (mode);
20007 if (index == -1)
20008 return 100;
20009 if (in == 2)
20010 return MAX (ix86_cost->hard_register.sse_load [index],
20011 ix86_cost->hard_register.sse_store [index]);
20012 return in ? ix86_cost->hard_register.sse_load [index]
20013 : ix86_cost->hard_register.sse_store [index];
20015 if (MASK_CLASS_P (regclass))
20017 int index;
20018 switch (GET_MODE_SIZE (mode))
20020 case 1:
20021 index = 0;
20022 break;
20023 case 2:
20024 index = 1;
20025 break;
20026 /* DImode loads and stores assumed to cost the same as SImode. */
20027 case 4:
20028 case 8:
20029 index = 2;
20030 break;
20031 default:
20032 return 100;
20035 if (in == 2)
20036 return MAX (ix86_cost->hard_register.mask_load[index],
20037 ix86_cost->hard_register.mask_store[index]);
20038 return in ? ix86_cost->hard_register.mask_load[2]
20039 : ix86_cost->hard_register.mask_store[2];
20041 if (MMX_CLASS_P (regclass))
20043 int index;
20044 switch (GET_MODE_SIZE (mode))
20046 case 4:
20047 index = 0;
20048 break;
20049 case 8:
20050 index = 1;
20051 break;
20052 default:
20053 return 100;
20055 if (in == 2)
20056 return MAX (ix86_cost->hard_register.mmx_load [index],
20057 ix86_cost->hard_register.mmx_store [index]);
20058 return in ? ix86_cost->hard_register.mmx_load [index]
20059 : ix86_cost->hard_register.mmx_store [index];
20061 switch (GET_MODE_SIZE (mode))
20063 case 1:
20064 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20066 if (!in)
20067 return ix86_cost->hard_register.int_store[0];
20068 if (TARGET_PARTIAL_REG_DEPENDENCY
20069 && optimize_function_for_speed_p (cfun))
20070 cost = ix86_cost->hard_register.movzbl_load;
20071 else
20072 cost = ix86_cost->hard_register.int_load[0];
20073 if (in == 2)
20074 return MAX (cost, ix86_cost->hard_register.int_store[0]);
20075 return cost;
20077 else
20079 if (in == 2)
20080 return MAX (ix86_cost->hard_register.movzbl_load,
20081 ix86_cost->hard_register.int_store[0] + 4);
20082 if (in)
20083 return ix86_cost->hard_register.movzbl_load;
20084 else
20085 return ix86_cost->hard_register.int_store[0] + 4;
20087 break;
20088 case 2:
20090 int cost;
20091 if (in == 2)
20092 cost = MAX (ix86_cost->hard_register.int_load[1],
20093 ix86_cost->hard_register.int_store[1]);
20094 else
20095 cost = in ? ix86_cost->hard_register.int_load[1]
20096 : ix86_cost->hard_register.int_store[1];
20098 if (mode == E_HFmode)
20100 /* Prefer SSE over GPR for HFmode. */
20101 int sse_cost;
20102 int index = sse_store_index (mode);
20103 if (in == 2)
20104 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
20105 ix86_cost->hard_register.sse_store[index]);
20106 else
20107 sse_cost = (in
20108 ? ix86_cost->hard_register.sse_load [index]
20109 : ix86_cost->hard_register.sse_store [index]);
20110 if (sse_cost >= cost)
20111 cost = sse_cost + 1;
20113 return cost;
20115 default:
20116 if (in == 2)
20117 cost = MAX (ix86_cost->hard_register.int_load[2],
20118 ix86_cost->hard_register.int_store[2]);
20119 else if (in)
20120 cost = ix86_cost->hard_register.int_load[2];
20121 else
20122 cost = ix86_cost->hard_register.int_store[2];
20123 /* Multiply with the number of GPR moves needed. */
20124 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
20128 static int
20129 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
20131 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
20135 /* Return the cost of moving data from a register in class CLASS1 to
20136 one in class CLASS2.
20138 It is not required that the cost always equal 2 when FROM is the same as TO;
20139 on some machines it is expensive to move between registers if they are not
20140 general registers. */
20142 static int
20143 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
20144 reg_class_t class2_i)
20146 enum reg_class class1 = (enum reg_class) class1_i;
20147 enum reg_class class2 = (enum reg_class) class2_i;
20149 /* In case we require secondary memory, compute cost of the store followed
20150 by load. In order to avoid bad register allocation choices, we need
20151 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20153 if (inline_secondary_memory_needed (mode, class1, class2, false))
20155 int cost = 1;
20157 cost += inline_memory_move_cost (mode, class1, 2);
20158 cost += inline_memory_move_cost (mode, class2, 2);
20160 /* In case of copying from general_purpose_register we may emit multiple
20161 stores followed by single load causing memory size mismatch stall.
20162 Count this as arbitrarily high cost of 20. */
20163 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
20164 && TARGET_MEMORY_MISMATCH_STALL
20165 && targetm.class_max_nregs (class1, mode)
20166 > targetm.class_max_nregs (class2, mode))
20167 cost += 20;
20169 /* In the case of FP/MMX moves, the registers actually overlap, and we
20170 have to switch modes in order to treat them differently. */
20171 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20172 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20173 cost += 20;
20175 return cost;
20178 /* Moves between MMX and non-MMX units require secondary memory. */
20179 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20180 gcc_unreachable ();
20182 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20183 return (SSE_CLASS_P (class1)
20184 ? ix86_cost->hard_register.sse_to_integer
20185 : ix86_cost->hard_register.integer_to_sse);
20187 /* Moves between mask register and GPR. */
20188 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20190 return (MASK_CLASS_P (class1)
20191 ? ix86_cost->hard_register.mask_to_integer
20192 : ix86_cost->hard_register.integer_to_mask);
20194 /* Moving between mask registers. */
20195 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
20196 return ix86_cost->hard_register.mask_move;
20198 if (MAYBE_FLOAT_CLASS_P (class1))
20199 return ix86_cost->hard_register.fp_move;
20200 if (MAYBE_SSE_CLASS_P (class1))
20202 if (GET_MODE_BITSIZE (mode) <= 128)
20203 return ix86_cost->hard_register.xmm_move;
20204 if (GET_MODE_BITSIZE (mode) <= 256)
20205 return ix86_cost->hard_register.ymm_move;
20206 return ix86_cost->hard_register.zmm_move;
20208 if (MAYBE_MMX_CLASS_P (class1))
20209 return ix86_cost->hard_register.mmx_move;
20210 return 2;
20213 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20214 words of a value of mode MODE but can be less for certain modes in
20215 special long registers.
20217 Actually there are no two word move instructions for consecutive
20218 registers. And only registers 0-3 may have mov byte instructions
20219 applied to them. */
20221 static unsigned int
20222 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
20224 if (GENERAL_REGNO_P (regno))
20226 if (mode == XFmode)
20227 return TARGET_64BIT ? 2 : 3;
20228 if (mode == XCmode)
20229 return TARGET_64BIT ? 4 : 6;
20230 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20232 if (COMPLEX_MODE_P (mode))
20233 return 2;
20234 /* Register pair for mask registers. */
20235 if (mode == P2QImode || mode == P2HImode)
20236 return 2;
20237 if (mode == V64SFmode || mode == V64SImode)
20238 return 4;
20239 return 1;
20242 /* Implement REGMODE_NATURAL_SIZE(MODE). */
20243 unsigned int
20244 ix86_regmode_natural_size (machine_mode mode)
20246 if (mode == P2HImode || mode == P2QImode)
20247 return GET_MODE_SIZE (mode) / 2;
20248 return UNITS_PER_WORD;
20251 /* Implement TARGET_HARD_REGNO_MODE_OK. */
20253 static bool
20254 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
20256 /* Flags and only flags can only hold CCmode values. */
20257 if (CC_REGNO_P (regno))
20258 return GET_MODE_CLASS (mode) == MODE_CC;
20259 if (GET_MODE_CLASS (mode) == MODE_CC
20260 || GET_MODE_CLASS (mode) == MODE_RANDOM)
20261 return false;
20262 if (STACK_REGNO_P (regno))
20263 return VALID_FP_MODE_P (mode);
20264 if (MASK_REGNO_P (regno))
20266 /* Register pair only starts at even register number. */
20267 if ((mode == P2QImode || mode == P2HImode))
20268 return MASK_PAIR_REGNO_P(regno);
20270 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
20271 || (TARGET_AVX512BW
20272 && VALID_MASK_AVX512BW_MODE (mode)));
20275 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20276 return false;
20278 if (SSE_REGNO_P (regno))
20280 /* We implement the move patterns for all vector modes into and
20281 out of SSE registers, even when no operation instructions
20282 are available. */
20284 /* For AVX-512 we allow, regardless of regno:
20285 - XI mode
20286 - any of 512-bit wide vector mode
20287 - any scalar mode. */
20288 if (TARGET_AVX512F
20289 && (VALID_AVX512F_REG_OR_XI_MODE (mode)
20290 || VALID_AVX512F_SCALAR_MODE (mode)))
20291 return true;
20293 /* For AVX-5124FMAPS or AVX-5124VNNIW
20294 allow V64SF and V64SI modes for special regnos. */
20295 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
20296 && (mode == V64SFmode || mode == V64SImode)
20297 && MOD4_SSE_REGNO_P (regno))
20298 return true;
20300 /* TODO check for QI/HI scalars. */
20301 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20302 if (TARGET_AVX512VL
20303 && (VALID_AVX256_REG_OR_OI_MODE (mode)
20304 || VALID_AVX512VL_128_REG_MODE (mode)))
20305 return true;
20307 /* xmm16-xmm31 are only available for AVX-512. */
20308 if (EXT_REX_SSE_REGNO_P (regno))
20309 return false;
20311 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20312 if (TARGET_SSE2 && mode == HImode)
20313 return true;
20315 /* OImode and AVX modes are available only when AVX is enabled. */
20316 return ((TARGET_AVX
20317 && VALID_AVX256_REG_OR_OI_MODE (mode))
20318 || VALID_SSE_REG_MODE (mode)
20319 || VALID_SSE2_REG_MODE (mode)
20320 || VALID_MMX_REG_MODE (mode)
20321 || VALID_MMX_REG_MODE_3DNOW (mode));
20323 if (MMX_REGNO_P (regno))
20325 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20326 so if the register is available at all, then we can move data of
20327 the given mode into or out of it. */
20328 return (VALID_MMX_REG_MODE (mode)
20329 || VALID_MMX_REG_MODE_3DNOW (mode));
20332 if (mode == QImode)
20334 /* Take care for QImode values - they can be in non-QI regs,
20335 but then they do cause partial register stalls. */
20336 if (ANY_QI_REGNO_P (regno))
20337 return true;
20338 if (!TARGET_PARTIAL_REG_STALL)
20339 return true;
20340 /* LRA checks if the hard register is OK for the given mode.
20341 QImode values can live in non-QI regs, so we allow all
20342 registers here. */
20343 if (lra_in_progress)
20344 return true;
20345 return !can_create_pseudo_p ();
20347 /* We handle both integer and floats in the general purpose registers. */
20348 else if (VALID_INT_MODE_P (mode)
20349 || VALID_FP_MODE_P (mode))
20350 return true;
20351 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20352 on to use that value in smaller contexts, this can easily force a
20353 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20354 supporting DImode, allow it. */
20355 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20356 return true;
20358 return false;
20361 /* Implement TARGET_INSN_CALLEE_ABI. */
20363 const predefined_function_abi &
20364 ix86_insn_callee_abi (const rtx_insn *insn)
20366 unsigned int abi_id = 0;
20367 rtx pat = PATTERN (insn);
20368 if (vzeroupper_pattern (pat, VOIDmode))
20369 abi_id = ABI_VZEROUPPER;
20371 return function_abis[abi_id];
20374 /* Initialize function_abis with corresponding abi_id,
20375 currently only handle vzeroupper. */
20376 void
20377 ix86_initialize_callee_abi (unsigned int abi_id)
20379 gcc_assert (abi_id == ABI_VZEROUPPER);
20380 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
20381 if (!vzeroupper_abi.initialized_p ())
20383 HARD_REG_SET full_reg_clobbers;
20384 CLEAR_HARD_REG_SET (full_reg_clobbers);
20385 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
20389 void
20390 ix86_expand_avx_vzeroupper (void)
20392 /* Initialize vzeroupper_abi here. */
20393 ix86_initialize_callee_abi (ABI_VZEROUPPER);
20394 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20395 /* Return false for non-local goto in can_nonlocal_goto. */
20396 make_reg_eh_region_note (insn, 0, INT_MIN);
20397 /* Flag used for call_insn indicates it's a fake call. */
20398 RTX_FLAG (insn, used) = 1;
20402 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20403 saves SSE registers across calls is Win64 (thus no need to check the
20404 current ABI here), and with AVX enabled Win64 only guarantees that
20405 the low 16 bytes are saved. */
20407 static bool
20408 ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
20409 machine_mode mode)
20411 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20412 if (abi_id == ABI_VZEROUPPER)
20413 return (GET_MODE_SIZE (mode) > 16
20414 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
20415 || LEGACY_SSE_REGNO_P (regno)));
20417 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
20420 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20421 tieable integer mode. */
20423 static bool
20424 ix86_tieable_integer_mode_p (machine_mode mode)
20426 switch (mode)
20428 case E_HImode:
20429 case E_SImode:
20430 return true;
20432 case E_QImode:
20433 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20435 case E_DImode:
20436 return TARGET_64BIT;
20438 default:
20439 return false;
20443 /* Implement TARGET_MODES_TIEABLE_P.
20445 Return true if MODE1 is accessible in a register that can hold MODE2
20446 without copying. That is, all register classes that can hold MODE2
20447 can also hold MODE1. */
20449 static bool
20450 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
20452 if (mode1 == mode2)
20453 return true;
20455 if (ix86_tieable_integer_mode_p (mode1)
20456 && ix86_tieable_integer_mode_p (mode2))
20457 return true;
20459 /* MODE2 being XFmode implies fp stack or general regs, which means we
20460 can tie any smaller floating point modes to it. Note that we do not
20461 tie this with TFmode. */
20462 if (mode2 == XFmode)
20463 return mode1 == SFmode || mode1 == DFmode;
20465 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20466 that we can tie it with SFmode. */
20467 if (mode2 == DFmode)
20468 return mode1 == SFmode;
20470 /* If MODE2 is only appropriate for an SSE register, then tie with
20471 any other mode acceptable to SSE registers. */
20472 if (GET_MODE_SIZE (mode2) == 64
20473 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20474 return (GET_MODE_SIZE (mode1) == 64
20475 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20476 if (GET_MODE_SIZE (mode2) == 32
20477 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20478 return (GET_MODE_SIZE (mode1) == 32
20479 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20480 if (GET_MODE_SIZE (mode2) == 16
20481 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20482 return (GET_MODE_SIZE (mode1) == 16
20483 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20485 /* If MODE2 is appropriate for an MMX register, then tie
20486 with any other mode acceptable to MMX registers. */
20487 if (GET_MODE_SIZE (mode2) == 8
20488 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20489 return (GET_MODE_SIZE (mode1) == 8
20490 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20492 /* SCmode and DImode can be tied. */
20493 if ((mode1 == E_SCmode && mode2 == E_DImode)
20494 || (mode1 == E_DImode && mode2 == E_SCmode))
20495 return TARGET_64BIT;
20497 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
20498 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
20499 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
20500 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
20501 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
20502 return true;
20504 return false;
20507 /* Return the cost of moving between two registers of mode MODE. */
20509 static int
20510 ix86_set_reg_reg_cost (machine_mode mode)
20512 unsigned int units = UNITS_PER_WORD;
20514 switch (GET_MODE_CLASS (mode))
20516 default:
20517 break;
20519 case MODE_CC:
20520 units = GET_MODE_SIZE (CCmode);
20521 break;
20523 case MODE_FLOAT:
20524 if ((TARGET_SSE && mode == TFmode)
20525 || (TARGET_80387 && mode == XFmode)
20526 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
20527 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
20528 units = GET_MODE_SIZE (mode);
20529 break;
20531 case MODE_COMPLEX_FLOAT:
20532 if ((TARGET_SSE && mode == TCmode)
20533 || (TARGET_80387 && mode == XCmode)
20534 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
20535 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
20536 units = GET_MODE_SIZE (mode);
20537 break;
20539 case MODE_VECTOR_INT:
20540 case MODE_VECTOR_FLOAT:
20541 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20542 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20543 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20544 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20545 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
20546 && VALID_MMX_REG_MODE (mode)))
20547 units = GET_MODE_SIZE (mode);
20550 /* Return the cost of moving between two registers of mode MODE,
20551 assuming that the move will be in pieces of at most UNITS bytes. */
20552 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
20555 /* Return cost of vector operation in MODE given that scalar version has
20556 COST. */
20558 static int
20559 ix86_vec_cost (machine_mode mode, int cost)
20561 if (!VECTOR_MODE_P (mode))
20562 return cost;
20564 if (GET_MODE_BITSIZE (mode) == 128
20565 && TARGET_SSE_SPLIT_REGS)
20566 return cost * GET_MODE_BITSIZE (mode) / 64;
20567 else if (GET_MODE_BITSIZE (mode) > 128
20568 && TARGET_AVX256_SPLIT_REGS)
20569 return cost * GET_MODE_BITSIZE (mode) / 128;
20570 else if (GET_MODE_BITSIZE (mode) > 256
20571 && TARGET_AVX512_SPLIT_REGS)
20572 return cost * GET_MODE_BITSIZE (mode) / 256;
20573 return cost;
20576 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
20577 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
20578 static int
20579 ix86_widen_mult_cost (const struct processor_costs *cost,
20580 enum machine_mode mode, bool uns_p)
20582 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
20583 int extra_cost = 0;
20584 int basic_cost = 0;
20585 switch (mode)
20587 case V8HImode:
20588 case V16HImode:
20589 if (!uns_p || mode == V16HImode)
20590 extra_cost = cost->sse_op * 2;
20591 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
20592 break;
20593 case V4SImode:
20594 case V8SImode:
20595 /* pmulhw/pmullw can be used. */
20596 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
20597 break;
20598 case V2DImode:
20599 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
20600 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
20601 if (!TARGET_SSE4_1 && !uns_p)
20602 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
20603 + cost->sse_op * 2;
20604 /* Fallthru. */
20605 case V4DImode:
20606 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
20607 break;
20608 default:
20609 /* Not implemented. */
20610 return 100;
20612 return ix86_vec_cost (mode, basic_cost + extra_cost);
20615 /* Return cost of multiplication in MODE. */
20617 static int
20618 ix86_multiplication_cost (const struct processor_costs *cost,
20619 enum machine_mode mode)
20621 machine_mode inner_mode = mode;
20622 if (VECTOR_MODE_P (mode))
20623 inner_mode = GET_MODE_INNER (mode);
20625 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20626 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
20627 else if (X87_FLOAT_MODE_P (mode))
20628 return cost->fmul;
20629 else if (FLOAT_MODE_P (mode))
20630 return ix86_vec_cost (mode,
20631 inner_mode == DFmode ? cost->mulsd : cost->mulss);
20632 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20634 int nmults, nops;
20635 /* Cost of reading the memory. */
20636 int extra;
20638 switch (mode)
20640 case V4QImode:
20641 case V8QImode:
20642 /* Partial V*QImode is emulated with 4-6 insns. */
20643 nmults = 1;
20644 nops = 3;
20645 extra = 0;
20647 if (TARGET_AVX512BW && TARGET_AVX512VL)
20649 else if (TARGET_AVX2)
20650 nops += 2;
20651 else if (TARGET_XOP)
20652 extra += cost->sse_load[2];
20653 else
20655 nops += 1;
20656 extra += cost->sse_load[2];
20658 goto do_qimode;
20660 case V16QImode:
20661 /* V*QImode is emulated with 4-11 insns. */
20662 nmults = 1;
20663 nops = 3;
20664 extra = 0;
20666 if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
20668 if (!(TARGET_AVX512BW && TARGET_AVX512VL))
20669 nops += 3;
20671 else if (TARGET_XOP)
20673 nmults += 1;
20674 nops += 2;
20675 extra += cost->sse_load[2];
20677 else
20679 nmults += 1;
20680 nops += 4;
20681 extra += cost->sse_load[2];
20683 goto do_qimode;
20685 case V32QImode:
20686 nmults = 1;
20687 nops = 3;
20688 extra = 0;
20690 if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
20692 nmults += 1;
20693 nops += 4;
20694 extra += cost->sse_load[3] * 2;
20696 goto do_qimode;
20698 case V64QImode:
20699 nmults = 2;
20700 nops = 9;
20701 extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
20703 do_qimode:
20704 return ix86_vec_cost (mode, cost->mulss * nmults
20705 + cost->sse_op * nops) + extra;
20707 case V4SImode:
20708 /* pmulld is used in this case. No emulation is needed. */
20709 if (TARGET_SSE4_1)
20710 goto do_native;
20711 /* V4SImode is emulated with 7 insns. */
20712 else
20713 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
20715 case V2DImode:
20716 case V4DImode:
20717 /* vpmullq is used in this case. No emulation is needed. */
20718 if (TARGET_AVX512DQ && TARGET_AVX512VL)
20719 goto do_native;
20720 /* V*DImode is emulated with 6-8 insns. */
20721 else if (TARGET_XOP && mode == V2DImode)
20722 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
20723 /* FALLTHRU */
20724 case V8DImode:
20725 /* vpmullq is used in this case. No emulation is needed. */
20726 if (TARGET_AVX512DQ && mode == V8DImode)
20727 goto do_native;
20728 else
20729 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
20731 default:
20732 do_native:
20733 return ix86_vec_cost (mode, cost->mulss);
20736 else
20737 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
20740 /* Return cost of multiplication in MODE. */
20742 static int
20743 ix86_division_cost (const struct processor_costs *cost,
20744 enum machine_mode mode)
20746 machine_mode inner_mode = mode;
20747 if (VECTOR_MODE_P (mode))
20748 inner_mode = GET_MODE_INNER (mode);
20750 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20751 return inner_mode == DFmode ? cost->divsd : cost->divss;
20752 else if (X87_FLOAT_MODE_P (mode))
20753 return cost->fdiv;
20754 else if (FLOAT_MODE_P (mode))
20755 return ix86_vec_cost (mode,
20756 inner_mode == DFmode ? cost->divsd : cost->divss);
20757 else
20758 return cost->divide[MODE_INDEX (mode)];
20761 /* Return cost of shift in MODE.
20762 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
20763 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
20764 if op1 is a result of subreg.
20766 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
20768 static int
20769 ix86_shift_rotate_cost (const struct processor_costs *cost,
20770 enum rtx_code code,
20771 enum machine_mode mode, bool constant_op1,
20772 HOST_WIDE_INT op1_val,
20773 bool and_in_op1,
20774 bool shift_and_truncate,
20775 bool *skip_op0, bool *skip_op1)
20777 if (skip_op0)
20778 *skip_op0 = *skip_op1 = false;
20780 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20782 int count;
20783 /* Cost of reading the memory. */
20784 int extra;
20786 switch (mode)
20788 case V4QImode:
20789 case V8QImode:
20790 if (TARGET_AVX2)
20791 /* Use vpbroadcast. */
20792 extra = cost->sse_op;
20793 else
20794 extra = cost->sse_load[2];
20796 if (constant_op1)
20798 if (code == ASHIFTRT)
20800 count = 4;
20801 extra *= 2;
20803 else
20804 count = 2;
20806 else if (TARGET_AVX512BW && TARGET_AVX512VL)
20807 return ix86_vec_cost (mode, cost->sse_op * 4);
20808 else if (TARGET_SSE4_1)
20809 count = 5;
20810 else if (code == ASHIFTRT)
20811 count = 6;
20812 else
20813 count = 5;
20814 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
20816 case V16QImode:
20817 if (TARGET_XOP)
20819 /* For XOP we use vpshab, which requires a broadcast of the
20820 value to the variable shift insn. For constants this
20821 means a V16Q const in mem; even when we can perform the
20822 shift with one insn set the cost to prefer paddb. */
20823 if (constant_op1)
20825 extra = cost->sse_load[2];
20826 return ix86_vec_cost (mode, cost->sse_op) + extra;
20828 else
20830 count = (code == ASHIFT) ? 3 : 4;
20831 return ix86_vec_cost (mode, cost->sse_op * count);
20834 /* FALLTHRU */
20835 case V32QImode:
20836 if (TARGET_AVX2)
20837 /* Use vpbroadcast. */
20838 extra = cost->sse_op;
20839 else
20840 extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
20842 if (constant_op1)
20844 if (code == ASHIFTRT)
20846 count = 4;
20847 extra *= 2;
20849 else
20850 count = 2;
20852 else if (TARGET_AVX512BW
20853 && ((mode == V32QImode && !TARGET_PREFER_AVX256)
20854 || (mode == V16QImode && TARGET_AVX512VL
20855 && !TARGET_PREFER_AVX128)))
20856 return ix86_vec_cost (mode, cost->sse_op * 4);
20857 else if (TARGET_AVX2
20858 && mode == V16QImode && !TARGET_PREFER_AVX128)
20859 count = 6;
20860 else if (TARGET_SSE4_1)
20861 count = 9;
20862 else if (code == ASHIFTRT)
20863 count = 10;
20864 else
20865 count = 9;
20866 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
20868 case V2DImode:
20869 case V4DImode:
20870 /* V*DImode arithmetic right shift is emulated. */
20871 if (code == ASHIFTRT && !TARGET_AVX512VL)
20873 if (constant_op1)
20875 if (op1_val == 63)
20876 count = TARGET_SSE4_2 ? 1 : 2;
20877 else if (TARGET_XOP)
20878 count = 2;
20879 else if (TARGET_SSE4_1)
20880 count = 3;
20881 else
20882 count = 4;
20884 else if (TARGET_XOP)
20885 count = 3;
20886 else if (TARGET_SSE4_2)
20887 count = 4;
20888 else
20889 count = 5;
20891 return ix86_vec_cost (mode, cost->sse_op * count);
20893 /* FALLTHRU */
20894 default:
20895 return ix86_vec_cost (mode, cost->sse_op);
20899 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20901 if (constant_op1)
20903 if (op1_val > 32)
20904 return cost->shift_const + COSTS_N_INSNS (2);
20905 else
20906 return cost->shift_const * 2;
20908 else
20910 if (and_in_op1)
20911 return cost->shift_var * 2;
20912 else
20913 return cost->shift_var * 6 + COSTS_N_INSNS (2);
20916 else
20918 if (constant_op1)
20919 return cost->shift_const;
20920 else if (shift_and_truncate)
20922 if (skip_op0)
20923 *skip_op0 = *skip_op1 = true;
20924 /* Return the cost after shift-and truncation. */
20925 return cost->shift_var;
20927 else
20928 return cost->shift_var;
20932 /* Compute a (partial) cost for rtx X. Return true if the complete
20933 cost has been computed, and false if subexpressions should be
20934 scanned. In either case, *TOTAL contains the cost result. */
20936 static bool
20937 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
20938 int *total, bool speed)
20940 rtx mask;
20941 enum rtx_code code = GET_CODE (x);
20942 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20943 const struct processor_costs *cost
20944 = speed ? ix86_tune_cost : &ix86_size_cost;
20945 int src_cost;
20947 switch (code)
20949 case SET:
20950 if (register_operand (SET_DEST (x), VOIDmode)
20951 && register_operand (SET_SRC (x), VOIDmode))
20953 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
20954 return true;
20957 if (register_operand (SET_SRC (x), VOIDmode))
20958 /* Avoid potentially incorrect high cost from rtx_costs
20959 for non-tieable SUBREGs. */
20960 src_cost = 0;
20961 else
20963 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
20965 if (CONSTANT_P (SET_SRC (x)))
20966 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
20967 a small value, possibly zero for cheap constants. */
20968 src_cost += COSTS_N_INSNS (1);
20971 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
20972 return true;
20974 case CONST_INT:
20975 case CONST:
20976 case LABEL_REF:
20977 case SYMBOL_REF:
20978 if (x86_64_immediate_operand (x, VOIDmode))
20979 *total = 0;
20980 else
20981 *total = 1;
20982 return true;
20984 case CONST_DOUBLE:
20985 if (IS_STACK_MODE (mode))
20986 switch (standard_80387_constant_p (x))
20988 case -1:
20989 case 0:
20990 break;
20991 case 1: /* 0.0 */
20992 *total = 1;
20993 return true;
20994 default: /* Other constants */
20995 *total = 2;
20996 return true;
20998 /* FALLTHRU */
21000 case CONST_VECTOR:
21001 switch (standard_sse_constant_p (x, mode))
21003 case 0:
21004 break;
21005 case 1: /* 0: xor eliminates false dependency */
21006 *total = 0;
21007 return true;
21008 default: /* -1: cmp contains false dependency */
21009 *total = 1;
21010 return true;
21012 /* FALLTHRU */
21014 case CONST_WIDE_INT:
21015 /* Fall back to (MEM (SYMBOL_REF)), since that's where
21016 it'll probably end up. Add a penalty for size. */
21017 *total = (COSTS_N_INSNS (1)
21018 + (!TARGET_64BIT && flag_pic)
21019 + (GET_MODE_SIZE (mode) <= 4
21020 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
21021 return true;
21023 case ZERO_EXTEND:
21024 /* The zero extensions is often completely free on x86_64, so make
21025 it as cheap as possible. */
21026 if (TARGET_64BIT && mode == DImode
21027 && GET_MODE (XEXP (x, 0)) == SImode)
21028 *total = 1;
21029 else if (TARGET_ZERO_EXTEND_WITH_AND)
21030 *total = cost->add;
21031 else
21032 *total = cost->movzx;
21033 return false;
21035 case SIGN_EXTEND:
21036 *total = cost->movsx;
21037 return false;
21039 case ASHIFT:
21040 if (SCALAR_INT_MODE_P (mode)
21041 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
21042 && CONST_INT_P (XEXP (x, 1)))
21044 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21045 if (value == 1)
21047 *total = cost->add;
21048 return false;
21050 if ((value == 2 || value == 3)
21051 && cost->lea <= cost->shift_const)
21053 *total = cost->lea;
21054 return false;
21057 /* FALLTHRU */
21059 case ROTATE:
21060 case ASHIFTRT:
21061 case LSHIFTRT:
21062 case ROTATERT:
21063 bool skip_op0, skip_op1;
21064 *total = ix86_shift_rotate_cost (cost, code, mode,
21065 CONSTANT_P (XEXP (x, 1)),
21066 CONST_INT_P (XEXP (x, 1))
21067 ? INTVAL (XEXP (x, 1)) : -1,
21068 GET_CODE (XEXP (x, 1)) == AND,
21069 SUBREG_P (XEXP (x, 1))
21070 && GET_CODE (XEXP (XEXP (x, 1),
21071 0)) == AND,
21072 &skip_op0, &skip_op1);
21073 if (skip_op0 || skip_op1)
21075 if (!skip_op0)
21076 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21077 if (!skip_op1)
21078 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
21079 return true;
21081 return false;
21083 case FMA:
21085 rtx sub;
21087 gcc_assert (FLOAT_MODE_P (mode));
21088 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
21090 *total = ix86_vec_cost (mode,
21091 GET_MODE_INNER (mode) == SFmode
21092 ? cost->fmass : cost->fmasd);
21093 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
21095 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
21096 sub = XEXP (x, 0);
21097 if (GET_CODE (sub) == NEG)
21098 sub = XEXP (sub, 0);
21099 *total += rtx_cost (sub, mode, FMA, 0, speed);
21101 sub = XEXP (x, 2);
21102 if (GET_CODE (sub) == NEG)
21103 sub = XEXP (sub, 0);
21104 *total += rtx_cost (sub, mode, FMA, 2, speed);
21105 return true;
21108 case MULT:
21109 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
21111 rtx op0 = XEXP (x, 0);
21112 rtx op1 = XEXP (x, 1);
21113 int nbits;
21114 if (CONST_INT_P (XEXP (x, 1)))
21116 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
21117 for (nbits = 0; value != 0; value &= value - 1)
21118 nbits++;
21120 else
21121 /* This is arbitrary. */
21122 nbits = 7;
21124 /* Compute costs correctly for widening multiplication. */
21125 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
21126 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
21127 == GET_MODE_SIZE (mode))
21129 int is_mulwiden = 0;
21130 machine_mode inner_mode = GET_MODE (op0);
21132 if (GET_CODE (op0) == GET_CODE (op1))
21133 is_mulwiden = 1, op1 = XEXP (op1, 0);
21134 else if (CONST_INT_P (op1))
21136 if (GET_CODE (op0) == SIGN_EXTEND)
21137 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
21138 == INTVAL (op1);
21139 else
21140 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
21143 if (is_mulwiden)
21144 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
21147 int mult_init;
21148 // Double word multiplication requires 3 mults and 2 adds.
21149 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21151 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
21152 + 2 * cost->add;
21153 nbits *= 3;
21155 else mult_init = cost->mult_init[MODE_INDEX (mode)];
21157 *total = (mult_init
21158 + nbits * cost->mult_bit
21159 + rtx_cost (op0, mode, outer_code, opno, speed)
21160 + rtx_cost (op1, mode, outer_code, opno, speed));
21162 return true;
21164 *total = ix86_multiplication_cost (cost, mode);
21165 return false;
21167 case DIV:
21168 case UDIV:
21169 case MOD:
21170 case UMOD:
21171 *total = ix86_division_cost (cost, mode);
21172 return false;
21174 case PLUS:
21175 if (GET_MODE_CLASS (mode) == MODE_INT
21176 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21178 if (GET_CODE (XEXP (x, 0)) == PLUS
21179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
21180 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
21181 && CONSTANT_P (XEXP (x, 1)))
21183 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
21184 if (val == 2 || val == 4 || val == 8)
21186 *total = cost->lea;
21187 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21188 outer_code, opno, speed);
21189 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
21190 outer_code, opno, speed);
21191 *total += rtx_cost (XEXP (x, 1), mode,
21192 outer_code, opno, speed);
21193 return true;
21196 else if (GET_CODE (XEXP (x, 0)) == MULT
21197 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
21199 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
21200 if (val == 2 || val == 4 || val == 8)
21202 *total = cost->lea;
21203 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21204 outer_code, opno, speed);
21205 *total += rtx_cost (XEXP (x, 1), mode,
21206 outer_code, opno, speed);
21207 return true;
21210 else if (GET_CODE (XEXP (x, 0)) == PLUS)
21212 rtx op = XEXP (XEXP (x, 0), 0);
21214 /* Add with carry, ignore the cost of adding a carry flag. */
21215 if (ix86_carry_flag_operator (op, mode)
21216 || ix86_carry_flag_unset_operator (op, mode))
21217 *total = cost->add;
21218 else
21220 *total = cost->lea;
21221 *total += rtx_cost (op, mode,
21222 outer_code, opno, speed);
21225 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21226 outer_code, opno, speed);
21227 *total += rtx_cost (XEXP (x, 1), mode,
21228 outer_code, opno, speed);
21229 return true;
21232 /* FALLTHRU */
21234 case MINUS:
21235 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21236 if (GET_MODE_CLASS (mode) == MODE_INT
21237 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
21238 && GET_CODE (XEXP (x, 0)) == MINUS
21239 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
21240 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
21242 *total = cost->add;
21243 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21244 outer_code, opno, speed);
21245 *total += rtx_cost (XEXP (x, 1), mode,
21246 outer_code, opno, speed);
21247 return true;
21250 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21251 *total = cost->addss;
21252 else if (X87_FLOAT_MODE_P (mode))
21253 *total = cost->fadd;
21254 else if (FLOAT_MODE_P (mode))
21255 *total = ix86_vec_cost (mode, cost->addss);
21256 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21257 *total = ix86_vec_cost (mode, cost->sse_op);
21258 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21259 *total = cost->add * 2;
21260 else
21261 *total = cost->add;
21262 return false;
21264 case IOR:
21265 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21266 || SSE_FLOAT_MODE_P (mode))
21268 /* (ior (not ...) ...) can be a single insn in AVX512. */
21269 if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
21270 && (GET_MODE_SIZE (mode) == 64
21271 || (TARGET_AVX512VL
21272 && (GET_MODE_SIZE (mode) == 32
21273 || GET_MODE_SIZE (mode) == 16))))
21275 rtx right = GET_CODE (XEXP (x, 1)) != NOT
21276 ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
21278 *total = ix86_vec_cost (mode, cost->sse_op)
21279 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21280 outer_code, opno, speed)
21281 + rtx_cost (right, mode, outer_code, opno, speed);
21282 return true;
21284 *total = ix86_vec_cost (mode, cost->sse_op);
21286 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21287 *total = cost->add * 2;
21288 else
21289 *total = cost->add;
21290 return false;
21292 case XOR:
21293 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21294 || SSE_FLOAT_MODE_P (mode))
21295 *total = ix86_vec_cost (mode, cost->sse_op);
21296 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21297 *total = cost->add * 2;
21298 else
21299 *total = cost->add;
21300 return false;
21302 case AND:
21303 if (address_no_seg_operand (x, mode))
21305 *total = cost->lea;
21306 return true;
21308 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
21309 || SSE_FLOAT_MODE_P (mode))
21311 /* pandn is a single instruction. */
21312 if (GET_CODE (XEXP (x, 0)) == NOT)
21314 rtx right = XEXP (x, 1);
21316 /* (and (not ...) (not ...)) can be a single insn in AVX512. */
21317 if (GET_CODE (right) == NOT && TARGET_AVX512F
21318 && (GET_MODE_SIZE (mode) == 64
21319 || (TARGET_AVX512VL
21320 && (GET_MODE_SIZE (mode) == 32
21321 || GET_MODE_SIZE (mode) == 16))))
21322 right = XEXP (right, 0);
21324 *total = ix86_vec_cost (mode, cost->sse_op)
21325 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21326 outer_code, opno, speed)
21327 + rtx_cost (right, mode, outer_code, opno, speed);
21328 return true;
21330 else if (GET_CODE (XEXP (x, 1)) == NOT)
21332 *total = ix86_vec_cost (mode, cost->sse_op)
21333 + rtx_cost (XEXP (x, 0), mode,
21334 outer_code, opno, speed)
21335 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21336 outer_code, opno, speed);
21337 return true;
21339 *total = ix86_vec_cost (mode, cost->sse_op);
21341 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21343 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21345 *total = cost->add * 2
21346 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21347 outer_code, opno, speed)
21348 + rtx_cost (XEXP (x, 1), mode,
21349 outer_code, opno, speed);
21350 return true;
21352 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
21354 *total = cost->add * 2
21355 + rtx_cost (XEXP (x, 0), mode,
21356 outer_code, opno, speed)
21357 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21358 outer_code, opno, speed);
21359 return true;
21361 *total = cost->add * 2;
21363 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21365 *total = cost->add
21366 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21367 outer_code, opno, speed)
21368 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
21369 return true;
21371 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
21373 *total = cost->add
21374 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
21375 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21376 outer_code, opno, speed);
21377 return true;
21379 else
21380 *total = cost->add;
21381 return false;
21383 case NOT:
21384 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21386 /* (not (xor ...)) can be a single insn in AVX512. */
21387 if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
21388 && (GET_MODE_SIZE (mode) == 64
21389 || (TARGET_AVX512VL
21390 && (GET_MODE_SIZE (mode) == 32
21391 || GET_MODE_SIZE (mode) == 16))))
21393 *total = ix86_vec_cost (mode, cost->sse_op)
21394 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21395 outer_code, opno, speed)
21396 + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21397 outer_code, opno, speed);
21398 return true;
21401 // vnot is pxor -1.
21402 *total = ix86_vec_cost (mode, cost->sse_op) + 1;
21404 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21405 *total = cost->add * 2;
21406 else
21407 *total = cost->add;
21408 return false;
21410 case NEG:
21411 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21412 *total = cost->sse_op;
21413 else if (X87_FLOAT_MODE_P (mode))
21414 *total = cost->fchs;
21415 else if (FLOAT_MODE_P (mode))
21416 *total = ix86_vec_cost (mode, cost->sse_op);
21417 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21418 *total = ix86_vec_cost (mode, cost->sse_op);
21419 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21420 *total = cost->add * 3;
21421 else
21422 *total = cost->add;
21423 return false;
21425 case COMPARE:
21426 rtx op0, op1;
21427 op0 = XEXP (x, 0);
21428 op1 = XEXP (x, 1);
21429 if (GET_CODE (op0) == ZERO_EXTRACT
21430 && XEXP (op0, 1) == const1_rtx
21431 && CONST_INT_P (XEXP (op0, 2))
21432 && op1 == const0_rtx)
21434 /* This kind of construct is implemented using test[bwl].
21435 Treat it as if we had an AND. */
21436 mode = GET_MODE (XEXP (op0, 0));
21437 *total = (cost->add
21438 + rtx_cost (XEXP (op0, 0), mode, outer_code,
21439 opno, speed)
21440 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
21441 return true;
21444 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
21446 /* This is an overflow detection, count it as a normal compare. */
21447 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
21448 return true;
21451 rtx geu;
21452 /* Match x
21453 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
21454 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
21455 if (mode == CCCmode
21456 && GET_CODE (op0) == NEG
21457 && GET_CODE (geu = XEXP (op0, 0)) == GEU
21458 && REG_P (XEXP (geu, 0))
21459 && (GET_MODE (XEXP (geu, 0)) == CCCmode
21460 || GET_MODE (XEXP (geu, 0)) == CCmode)
21461 && REGNO (XEXP (geu, 0)) == FLAGS_REG
21462 && XEXP (geu, 1) == const0_rtx
21463 && GET_CODE (op1) == LTU
21464 && REG_P (XEXP (op1, 0))
21465 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
21466 && REGNO (XEXP (op1, 0)) == FLAGS_REG
21467 && XEXP (op1, 1) == const0_rtx)
21469 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
21470 *total = 0;
21471 return true;
21473 /* Match x
21474 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
21475 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
21476 if (mode == CCCmode
21477 && GET_CODE (op0) == NEG
21478 && GET_CODE (XEXP (op0, 0)) == LTU
21479 && REG_P (XEXP (XEXP (op0, 0), 0))
21480 && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
21481 && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
21482 && XEXP (XEXP (op0, 0), 1) == const0_rtx
21483 && GET_CODE (op1) == GEU
21484 && REG_P (XEXP (op1, 0))
21485 && GET_MODE (XEXP (op1, 0)) == CCCmode
21486 && REGNO (XEXP (op1, 0)) == FLAGS_REG
21487 && XEXP (op1, 1) == const0_rtx)
21489 /* This is *x86_cmc. */
21490 if (!speed)
21491 *total = COSTS_N_BYTES (1);
21492 else if (TARGET_SLOW_STC)
21493 *total = COSTS_N_INSNS (2);
21494 else
21495 *total = COSTS_N_INSNS (1);
21496 return true;
21499 if (SCALAR_INT_MODE_P (GET_MODE (op0))
21500 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
21502 if (op1 == const0_rtx)
21503 *total = cost->add
21504 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
21505 else
21506 *total = 3*cost->add
21507 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
21508 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
21509 return true;
21512 /* The embedded comparison operand is completely free. */
21513 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
21514 *total = 0;
21516 return false;
21518 case FLOAT_EXTEND:
21519 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21520 *total = 0;
21521 else
21522 *total = ix86_vec_cost (mode, cost->addss);
21523 return false;
21525 case FLOAT_TRUNCATE:
21526 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21527 *total = cost->fadd;
21528 else
21529 *total = ix86_vec_cost (mode, cost->addss);
21530 return false;
21532 case ABS:
21533 /* SSE requires memory load for the constant operand. It may make
21534 sense to account for this. Of course the constant operand may or
21535 may not be reused. */
21536 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21537 *total = cost->sse_op;
21538 else if (X87_FLOAT_MODE_P (mode))
21539 *total = cost->fabs;
21540 else if (FLOAT_MODE_P (mode))
21541 *total = ix86_vec_cost (mode, cost->sse_op);
21542 return false;
21544 case SQRT:
21545 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21546 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
21547 else if (X87_FLOAT_MODE_P (mode))
21548 *total = cost->fsqrt;
21549 else if (FLOAT_MODE_P (mode))
21550 *total = ix86_vec_cost (mode,
21551 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
21552 return false;
21554 case UNSPEC:
21555 if (XINT (x, 1) == UNSPEC_TP)
21556 *total = 0;
21557 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
21559 *total = cost->sse_op;
21560 return true;
21562 else if (XINT (x, 1) == UNSPEC_PTEST)
21564 *total = cost->sse_op;
21565 rtx test_op0 = XVECEXP (x, 0, 0);
21566 if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
21567 return false;
21568 if (GET_CODE (test_op0) == AND)
21570 rtx and_op0 = XEXP (test_op0, 0);
21571 if (GET_CODE (and_op0) == NOT)
21572 and_op0 = XEXP (and_op0, 0);
21573 *total += rtx_cost (and_op0, GET_MODE (and_op0),
21574 AND, 0, speed)
21575 + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
21576 AND, 1, speed);
21578 else
21579 *total = rtx_cost (test_op0, GET_MODE (test_op0),
21580 UNSPEC, 0, speed);
21581 return true;
21583 return false;
21585 case VEC_SELECT:
21586 case VEC_CONCAT:
21587 case VEC_DUPLICATE:
21588 /* ??? Assume all of these vector manipulation patterns are
21589 recognizable. In which case they all pretty much have the
21590 same cost. */
21591 *total = cost->sse_op;
21592 return true;
21593 case VEC_MERGE:
21594 mask = XEXP (x, 2);
21595 /* This is masked instruction, assume the same cost,
21596 as nonmasked variant. */
21597 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
21598 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
21599 else
21600 *total = cost->sse_op;
21601 return true;
21603 case MEM:
21604 /* An insn that accesses memory is slightly more expensive
21605 than one that does not. */
21606 if (speed)
21607 *total += 1;
21608 return false;
21610 case ZERO_EXTRACT:
21611 if (XEXP (x, 1) == const1_rtx
21612 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
21613 && GET_MODE (XEXP (x, 2)) == SImode
21614 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
21616 /* Ignore cost of zero extension and masking of last argument. */
21617 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21618 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
21619 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
21620 return true;
21622 return false;
21624 case IF_THEN_ELSE:
21625 if (TARGET_XOP
21626 && VECTOR_MODE_P (mode)
21627 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
21629 /* vpcmov. */
21630 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
21631 if (!REG_P (XEXP (x, 0)))
21632 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21633 if (!REG_P (XEXP (x, 1)))
21634 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
21635 if (!REG_P (XEXP (x, 2)))
21636 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
21637 return true;
21639 else if (TARGET_CMOVE
21640 && SCALAR_INT_MODE_P (mode)
21641 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21643 /* cmov. */
21644 *total = COSTS_N_INSNS (1);
21645 if (!REG_P (XEXP (x, 0)))
21646 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21647 if (!REG_P (XEXP (x, 1)))
21648 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
21649 if (!REG_P (XEXP (x, 2)))
21650 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
21651 return true;
21653 return false;
21655 default:
21656 return false;
21660 #if TARGET_MACHO
21662 static int current_machopic_label_num;
21664 /* Given a symbol name and its associated stub, write out the
21665 definition of the stub. */
21667 void
21668 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21670 unsigned int length;
21671 char *binder_name, *symbol_name, lazy_ptr_name[32];
21672 int label = ++current_machopic_label_num;
21674 /* For 64-bit we shouldn't get here. */
21675 gcc_assert (!TARGET_64BIT);
21677 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21678 symb = targetm.strip_name_encoding (symb);
21680 length = strlen (stub);
21681 binder_name = XALLOCAVEC (char, length + 32);
21682 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
21684 length = strlen (symb);
21685 symbol_name = XALLOCAVEC (char, length + 32);
21686 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21688 sprintf (lazy_ptr_name, "L%d$lz", label);
21690 if (MACHOPIC_ATT_STUB)
21691 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
21692 else if (MACHOPIC_PURE)
21693 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
21694 else
21695 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
21697 fprintf (file, "%s:\n", stub);
21698 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21700 if (MACHOPIC_ATT_STUB)
21702 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
21704 else if (MACHOPIC_PURE)
21706 /* PIC stub. */
21707 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21708 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21709 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
21710 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
21711 label, lazy_ptr_name, label);
21712 fprintf (file, "\tjmp\t*%%ecx\n");
21714 else
21715 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
21717 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
21718 it needs no stub-binding-helper. */
21719 if (MACHOPIC_ATT_STUB)
21720 return;
21722 fprintf (file, "%s:\n", binder_name);
21724 if (MACHOPIC_PURE)
21726 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
21727 fprintf (file, "\tpushl\t%%ecx\n");
21729 else
21730 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21732 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
21734 /* N.B. Keep the correspondence of these
21735 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
21736 old-pic/new-pic/non-pic stubs; altering this will break
21737 compatibility with existing dylibs. */
21738 if (MACHOPIC_PURE)
21740 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21741 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
21743 else
21744 /* 16-byte -mdynamic-no-pic stub. */
21745 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
21747 fprintf (file, "%s:\n", lazy_ptr_name);
21748 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21749 fprintf (file, ASM_LONG "%s\n", binder_name);
21751 #endif /* TARGET_MACHO */
21753 /* Order the registers for register allocator. */
21755 void
21756 x86_order_regs_for_local_alloc (void)
21758 int pos = 0;
21759 int i;
21761 /* First allocate the local general purpose registers. */
21762 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21763 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
21764 reg_alloc_order [pos++] = i;
21766 /* Global general purpose registers. */
21767 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21768 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
21769 reg_alloc_order [pos++] = i;
21771 /* x87 registers come first in case we are doing FP math
21772 using them. */
21773 if (!TARGET_SSE_MATH)
21774 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21775 reg_alloc_order [pos++] = i;
21777 /* SSE registers. */
21778 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21779 reg_alloc_order [pos++] = i;
21780 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21781 reg_alloc_order [pos++] = i;
21783 /* Extended REX SSE registers. */
21784 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
21785 reg_alloc_order [pos++] = i;
21787 /* Mask register. */
21788 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
21789 reg_alloc_order [pos++] = i;
21791 /* x87 registers. */
21792 if (TARGET_SSE_MATH)
21793 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21794 reg_alloc_order [pos++] = i;
21796 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21797 reg_alloc_order [pos++] = i;
21799 /* Initialize the rest of array as we do not allocate some registers
21800 at all. */
21801 while (pos < FIRST_PSEUDO_REGISTER)
21802 reg_alloc_order [pos++] = 0;
21805 static bool
21806 ix86_ms_bitfield_layout_p (const_tree record_type)
21808 return ((TARGET_MS_BITFIELD_LAYOUT
21809 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21810 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
21813 /* Returns an expression indicating where the this parameter is
21814 located on entry to the FUNCTION. */
21816 static rtx
21817 x86_this_parameter (tree function)
21819 tree type = TREE_TYPE (function);
21820 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21821 int nregs;
21823 if (TARGET_64BIT)
21825 const int *parm_regs;
21827 if (ix86_function_type_abi (type) == MS_ABI)
21828 parm_regs = x86_64_ms_abi_int_parameter_registers;
21829 else
21830 parm_regs = x86_64_int_parameter_registers;
21831 return gen_rtx_REG (Pmode, parm_regs[aggr]);
21834 nregs = ix86_function_regparm (type, function);
21836 if (nregs > 0 && !stdarg_p (type))
21838 int regno;
21839 unsigned int ccvt = ix86_get_callcvt (type);
21841 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
21842 regno = aggr ? DX_REG : CX_REG;
21843 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
21845 regno = CX_REG;
21846 if (aggr)
21847 return gen_rtx_MEM (SImode,
21848 plus_constant (Pmode, stack_pointer_rtx, 4));
21850 else
21852 regno = AX_REG;
21853 if (aggr)
21855 regno = DX_REG;
21856 if (nregs == 1)
21857 return gen_rtx_MEM (SImode,
21858 plus_constant (Pmode,
21859 stack_pointer_rtx, 4));
21862 return gen_rtx_REG (SImode, regno);
21865 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
21866 aggr ? 8 : 4));
21869 /* Determine whether x86_output_mi_thunk can succeed. */
21871 static bool
21872 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
21873 const_tree function)
21875 /* 64-bit can handle anything. */
21876 if (TARGET_64BIT)
21877 return true;
21879 /* For 32-bit, everything's fine if we have one free register. */
21880 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21881 return true;
21883 /* Need a free register for vcall_offset. */
21884 if (vcall_offset)
21885 return false;
21887 /* Need a free register for GOT references. */
21888 if (flag_pic && !targetm.binds_local_p (function))
21889 return false;
21891 /* Otherwise ok. */
21892 return true;
21895 /* Output the assembler code for a thunk function. THUNK_DECL is the
21896 declaration for the thunk function itself, FUNCTION is the decl for
21897 the target function. DELTA is an immediate constant offset to be
21898 added to THIS. If VCALL_OFFSET is nonzero, the word at
21899 *(*this + vcall_offset) should be added to THIS. */
21901 static void
21902 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
21903 HOST_WIDE_INT vcall_offset, tree function)
21905 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
21906 rtx this_param = x86_this_parameter (function);
21907 rtx this_reg, tmp, fnaddr;
21908 unsigned int tmp_regno;
21909 rtx_insn *insn;
21910 int saved_flag_force_indirect_call = flag_force_indirect_call;
21912 if (TARGET_64BIT)
21913 tmp_regno = R10_REG;
21914 else
21916 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
21917 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
21918 tmp_regno = AX_REG;
21919 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
21920 tmp_regno = DX_REG;
21921 else
21922 tmp_regno = CX_REG;
21924 if (flag_pic)
21925 flag_force_indirect_call = 0;
21928 emit_note (NOTE_INSN_PROLOGUE_END);
21930 /* CET is enabled, insert EB instruction. */
21931 if ((flag_cf_protection & CF_BRANCH))
21932 emit_insn (gen_nop_endbr ());
21934 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21935 pull it in now and let DELTA benefit. */
21936 if (REG_P (this_param))
21937 this_reg = this_param;
21938 else if (vcall_offset)
21940 /* Put the this parameter into %eax. */
21941 this_reg = gen_rtx_REG (Pmode, AX_REG);
21942 emit_move_insn (this_reg, this_param);
21944 else
21945 this_reg = NULL_RTX;
21947 /* Adjust the this parameter by a fixed constant. */
21948 if (delta)
21950 rtx delta_rtx = GEN_INT (delta);
21951 rtx delta_dst = this_reg ? this_reg : this_param;
21953 if (TARGET_64BIT)
21955 if (!x86_64_general_operand (delta_rtx, Pmode))
21957 tmp = gen_rtx_REG (Pmode, tmp_regno);
21958 emit_move_insn (tmp, delta_rtx);
21959 delta_rtx = tmp;
21963 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
21966 /* Adjust the this parameter by a value stored in the vtable. */
21967 if (vcall_offset)
21969 rtx vcall_addr, vcall_mem, this_mem;
21971 tmp = gen_rtx_REG (Pmode, tmp_regno);
21973 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
21974 if (Pmode != ptr_mode)
21975 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
21976 emit_move_insn (tmp, this_mem);
21978 /* Adjust the this parameter. */
21979 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
21980 if (TARGET_64BIT
21981 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
21983 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
21984 emit_move_insn (tmp2, GEN_INT (vcall_offset));
21985 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
21988 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
21989 if (Pmode != ptr_mode)
21990 emit_insn (gen_addsi_1_zext (this_reg,
21991 gen_rtx_REG (ptr_mode,
21992 REGNO (this_reg)),
21993 vcall_mem));
21994 else
21995 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
21998 /* If necessary, drop THIS back to its stack slot. */
21999 if (this_reg && this_reg != this_param)
22000 emit_move_insn (this_param, this_reg);
22002 fnaddr = XEXP (DECL_RTL (function), 0);
22003 if (TARGET_64BIT)
22005 if (!flag_pic || targetm.binds_local_p (function)
22006 || TARGET_PECOFF)
22008 else
22010 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
22011 tmp = gen_rtx_CONST (Pmode, tmp);
22012 fnaddr = gen_const_mem (Pmode, tmp);
22015 else
22017 if (!flag_pic || targetm.binds_local_p (function))
22019 #if TARGET_MACHO
22020 else if (TARGET_MACHO)
22022 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
22023 fnaddr = XEXP (fnaddr, 0);
22025 #endif /* TARGET_MACHO */
22026 else
22028 tmp = gen_rtx_REG (Pmode, CX_REG);
22029 output_set_got (tmp, NULL_RTX);
22031 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
22032 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
22033 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
22034 fnaddr = gen_const_mem (Pmode, fnaddr);
22038 /* Our sibling call patterns do not allow memories, because we have no
22039 predicate that can distinguish between frame and non-frame memory.
22040 For our purposes here, we can get away with (ab)using a jump pattern,
22041 because we're going to do no optimization. */
22042 if (MEM_P (fnaddr))
22044 if (sibcall_insn_operand (fnaddr, word_mode))
22046 fnaddr = XEXP (DECL_RTL (function), 0);
22047 tmp = gen_rtx_MEM (QImode, fnaddr);
22048 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22049 tmp = emit_call_insn (tmp);
22050 SIBLING_CALL_P (tmp) = 1;
22052 else
22053 emit_jump_insn (gen_indirect_jump (fnaddr));
22055 else
22057 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
22059 // CM_LARGE_PIC always uses pseudo PIC register which is
22060 // uninitialized. Since FUNCTION is local and calling it
22061 // doesn't go through PLT, we use scratch register %r11 as
22062 // PIC register and initialize it here.
22063 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
22064 ix86_init_large_pic_reg (tmp_regno);
22065 fnaddr = legitimize_pic_address (fnaddr,
22066 gen_rtx_REG (Pmode, tmp_regno));
22069 if (!sibcall_insn_operand (fnaddr, word_mode))
22071 tmp = gen_rtx_REG (word_mode, tmp_regno);
22072 if (GET_MODE (fnaddr) != word_mode)
22073 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
22074 emit_move_insn (tmp, fnaddr);
22075 fnaddr = tmp;
22078 tmp = gen_rtx_MEM (QImode, fnaddr);
22079 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
22080 tmp = emit_call_insn (tmp);
22081 SIBLING_CALL_P (tmp) = 1;
22083 emit_barrier ();
22085 /* Emit just enough of rest_of_compilation to get the insns emitted. */
22086 insn = get_insns ();
22087 shorten_branches (insn);
22088 assemble_start_function (thunk_fndecl, fnname);
22089 final_start_function (insn, file, 1);
22090 final (insn, file, 1);
22091 final_end_function ();
22092 assemble_end_function (thunk_fndecl, fnname);
22094 flag_force_indirect_call = saved_flag_force_indirect_call;
22097 static void
22098 x86_file_start (void)
22100 default_file_start ();
22101 if (TARGET_16BIT)
22102 fputs ("\t.code16gcc\n", asm_out_file);
22103 #if TARGET_MACHO
22104 darwin_file_start ();
22105 #endif
22106 if (X86_FILE_START_VERSION_DIRECTIVE)
22107 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
22108 if (X86_FILE_START_FLTUSED)
22109 fputs ("\t.global\t__fltused\n", asm_out_file);
22110 if (ix86_asm_dialect == ASM_INTEL)
22111 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
22115 x86_field_alignment (tree type, int computed)
22117 machine_mode mode;
22119 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
22120 return computed;
22121 if (TARGET_IAMCU)
22122 return iamcu_alignment (type, computed);
22123 type = strip_array_types (type);
22124 mode = TYPE_MODE (type);
22125 if (mode == DFmode || mode == DCmode
22126 || GET_MODE_CLASS (mode) == MODE_INT
22127 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
22129 if (TYPE_ATOMIC (type) && computed > 32)
22131 static bool warned;
22133 if (!warned && warn_psabi)
22135 const char *url
22136 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
22138 warned = true;
22139 inform (input_location, "the alignment of %<_Atomic %T%> "
22140 "fields changed in %{GCC 11.1%}",
22141 TYPE_MAIN_VARIANT (type), url);
22144 else
22145 return MIN (32, computed);
22147 return computed;
22150 /* Print call to TARGET to FILE. */
22152 static void
22153 x86_print_call_or_nop (FILE *file, const char *target)
22155 if (flag_nop_mcount || !strcmp (target, "nop"))
22156 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
22157 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
22158 else
22159 fprintf (file, "1:\tcall\t%s\n", target);
22162 static bool
22163 current_fentry_name (const char **name)
22165 tree attr = lookup_attribute ("fentry_name",
22166 DECL_ATTRIBUTES (current_function_decl));
22167 if (!attr)
22168 return false;
22169 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22170 return true;
22173 static bool
22174 current_fentry_section (const char **name)
22176 tree attr = lookup_attribute ("fentry_section",
22177 DECL_ATTRIBUTES (current_function_decl));
22178 if (!attr)
22179 return false;
22180 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
22181 return true;
22184 /* Output assembler code to FILE to increment profiler label # LABELNO
22185 for profiling a function entry. */
22186 void
22187 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
22189 if (cfun->machine->insn_queued_at_entrance)
22191 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
22192 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
22193 unsigned int patch_area_size
22194 = crtl->patch_area_size - crtl->patch_area_entry;
22195 if (patch_area_size)
22196 ix86_output_patchable_area (patch_area_size,
22197 crtl->patch_area_entry == 0);
22200 const char *mcount_name = MCOUNT_NAME;
22202 if (current_fentry_name (&mcount_name))
22204 else if (fentry_name)
22205 mcount_name = fentry_name;
22206 else if (flag_fentry)
22207 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
22209 if (TARGET_64BIT)
22211 #ifndef NO_PROFILE_COUNTERS
22212 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
22213 #endif
22215 if (!TARGET_PECOFF)
22217 switch (ix86_cmodel)
22219 case CM_LARGE:
22220 /* NB: R10 is caller-saved. Although it can be used as a
22221 static chain register, it is preserved when calling
22222 mcount for nested functions. */
22223 fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
22224 mcount_name);
22225 break;
22226 case CM_LARGE_PIC:
22227 #ifdef NO_PROFILE_COUNTERS
22228 fprintf (file, "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
22229 fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
22230 fprintf (file, "\taddq\t%%r11, %%r10\n");
22231 fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
22232 fprintf (file, "\taddq\t%%r11, %%r10\n");
22233 fprintf (file, "\tcall\t*%%r10\n");
22234 #else
22235 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
22236 #endif
22237 break;
22238 case CM_SMALL_PIC:
22239 case CM_MEDIUM_PIC:
22240 if (!ix86_direct_extern_access)
22242 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
22243 break;
22245 /* fall through */
22246 default:
22247 x86_print_call_or_nop (file, mcount_name);
22248 break;
22251 else
22252 x86_print_call_or_nop (file, mcount_name);
22254 else if (flag_pic)
22256 #ifndef NO_PROFILE_COUNTERS
22257 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
22258 LPREFIX, labelno);
22259 #endif
22260 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
22262 else
22264 #ifndef NO_PROFILE_COUNTERS
22265 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
22266 LPREFIX, labelno);
22267 #endif
22268 x86_print_call_or_nop (file, mcount_name);
22271 if (flag_record_mcount
22272 || lookup_attribute ("fentry_section",
22273 DECL_ATTRIBUTES (current_function_decl)))
22275 const char *sname = "__mcount_loc";
22277 if (current_fentry_section (&sname))
22279 else if (fentry_section)
22280 sname = fentry_section;
22282 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
22283 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
22284 fprintf (file, "\t.previous\n");
22288 /* We don't have exact information about the insn sizes, but we may assume
22289 quite safely that we are informed about all 1 byte insns and memory
22290 address sizes. This is enough to eliminate unnecessary padding in
22291 99% of cases. */
22294 ix86_min_insn_size (rtx_insn *insn)
22296 int l = 0, len;
22298 if (!INSN_P (insn) || !active_insn_p (insn))
22299 return 0;
22301 /* Discard alignments we've emit and jump instructions. */
22302 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22303 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22304 return 0;
22306 /* Important case - calls are always 5 bytes.
22307 It is common to have many calls in the row. */
22308 if (CALL_P (insn)
22309 && symbolic_reference_mentioned_p (PATTERN (insn))
22310 && !SIBLING_CALL_P (insn))
22311 return 5;
22312 len = get_attr_length (insn);
22313 if (len <= 1)
22314 return 1;
22316 /* For normal instructions we rely on get_attr_length being exact,
22317 with a few exceptions. */
22318 if (!JUMP_P (insn))
22320 enum attr_type type = get_attr_type (insn);
22322 switch (type)
22324 case TYPE_MULTI:
22325 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
22326 || asm_noperands (PATTERN (insn)) >= 0)
22327 return 0;
22328 break;
22329 case TYPE_OTHER:
22330 case TYPE_FCMP:
22331 break;
22332 default:
22333 /* Otherwise trust get_attr_length. */
22334 return len;
22337 l = get_attr_length_address (insn);
22338 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22339 l = 4;
22341 if (l)
22342 return 1+l;
22343 else
22344 return 2;
22347 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22349 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22350 window. */
22352 static void
22353 ix86_avoid_jump_mispredicts (void)
22355 rtx_insn *insn, *start = get_insns ();
22356 int nbytes = 0, njumps = 0;
22357 bool isjump = false;
22359 /* Look for all minimal intervals of instructions containing 4 jumps.
22360 The intervals are bounded by START and INSN. NBYTES is the total
22361 size of instructions in the interval including INSN and not including
22362 START. When the NBYTES is smaller than 16 bytes, it is possible
22363 that the end of START and INSN ends up in the same 16byte page.
22365 The smallest offset in the page INSN can start is the case where START
22366 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22367 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
22369 Don't consider asm goto as jump, while it can contain a jump, it doesn't
22370 have to, control transfer to label(s) can be performed through other
22371 means, and also we estimate minimum length of all asm stmts as 0. */
22372 for (insn = start; insn; insn = NEXT_INSN (insn))
22374 int min_size;
22376 if (LABEL_P (insn))
22378 align_flags alignment = label_to_alignment (insn);
22379 int align = alignment.levels[0].log;
22380 int max_skip = alignment.levels[0].maxskip;
22382 if (max_skip > 15)
22383 max_skip = 15;
22384 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
22385 already in the current 16 byte page, because otherwise
22386 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
22387 bytes to reach 16 byte boundary. */
22388 if (align <= 0
22389 || (align <= 3 && max_skip != (1 << align) - 1))
22390 max_skip = 0;
22391 if (dump_file)
22392 fprintf (dump_file, "Label %i with max_skip %i\n",
22393 INSN_UID (insn), max_skip);
22394 if (max_skip)
22396 while (nbytes + max_skip >= 16)
22398 start = NEXT_INSN (start);
22399 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
22400 || CALL_P (start))
22401 njumps--, isjump = true;
22402 else
22403 isjump = false;
22404 nbytes -= ix86_min_insn_size (start);
22407 continue;
22410 min_size = ix86_min_insn_size (insn);
22411 nbytes += min_size;
22412 if (dump_file)
22413 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
22414 INSN_UID (insn), min_size);
22415 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
22416 || CALL_P (insn))
22417 njumps++;
22418 else
22419 continue;
22421 while (njumps > 3)
22423 start = NEXT_INSN (start);
22424 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
22425 || CALL_P (start))
22426 njumps--, isjump = true;
22427 else
22428 isjump = false;
22429 nbytes -= ix86_min_insn_size (start);
22431 gcc_assert (njumps >= 0);
22432 if (dump_file)
22433 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22434 INSN_UID (start), INSN_UID (insn), nbytes);
22436 if (njumps == 3 && isjump && nbytes < 16)
22438 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
22440 if (dump_file)
22441 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22442 INSN_UID (insn), padsize);
22443 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
22447 #endif
22449 /* AMD Athlon works faster
22450 when RET is not destination of conditional jump or directly preceded
22451 by other jump instruction. We avoid the penalty by inserting NOP just
22452 before the RET instructions in such cases. */
22453 static void
22454 ix86_pad_returns (void)
22456 edge e;
22457 edge_iterator ei;
22459 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22461 basic_block bb = e->src;
22462 rtx_insn *ret = BB_END (bb);
22463 rtx_insn *prev;
22464 bool replace = false;
22466 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
22467 || optimize_bb_for_size_p (bb))
22468 continue;
22469 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
22470 if (active_insn_p (prev) || LABEL_P (prev))
22471 break;
22472 if (prev && LABEL_P (prev))
22474 edge e;
22475 edge_iterator ei;
22477 FOR_EACH_EDGE (e, ei, bb->preds)
22478 if (EDGE_FREQUENCY (e) && e->src->index >= 0
22479 && !(e->flags & EDGE_FALLTHRU))
22481 replace = true;
22482 break;
22485 if (!replace)
22487 prev = prev_active_insn (ret);
22488 if (prev
22489 && ((JUMP_P (prev) && any_condjump_p (prev))
22490 || CALL_P (prev)))
22491 replace = true;
22492 /* Empty functions get branch mispredict even when
22493 the jump destination is not visible to us. */
22494 if (!prev && !optimize_function_for_size_p (cfun))
22495 replace = true;
22497 if (replace)
22499 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
22500 delete_insn (ret);
22505 /* Count the minimum number of instructions in BB. Return 4 if the
22506 number of instructions >= 4. */
22508 static int
22509 ix86_count_insn_bb (basic_block bb)
22511 rtx_insn *insn;
22512 int insn_count = 0;
22514 /* Count number of instructions in this block. Return 4 if the number
22515 of instructions >= 4. */
22516 FOR_BB_INSNS (bb, insn)
22518 /* Only happen in exit blocks. */
22519 if (JUMP_P (insn)
22520 && ANY_RETURN_P (PATTERN (insn)))
22521 break;
22523 if (NONDEBUG_INSN_P (insn)
22524 && GET_CODE (PATTERN (insn)) != USE
22525 && GET_CODE (PATTERN (insn)) != CLOBBER)
22527 insn_count++;
22528 if (insn_count >= 4)
22529 return insn_count;
22533 return insn_count;
22537 /* Count the minimum number of instructions in code path in BB.
22538 Return 4 if the number of instructions >= 4. */
22540 static int
22541 ix86_count_insn (basic_block bb)
22543 edge e;
22544 edge_iterator ei;
22545 int min_prev_count;
22547 /* Only bother counting instructions along paths with no
22548 more than 2 basic blocks between entry and exit. Given
22549 that BB has an edge to exit, determine if a predecessor
22550 of BB has an edge from entry. If so, compute the number
22551 of instructions in the predecessor block. If there
22552 happen to be multiple such blocks, compute the minimum. */
22553 min_prev_count = 4;
22554 FOR_EACH_EDGE (e, ei, bb->preds)
22556 edge prev_e;
22557 edge_iterator prev_ei;
22559 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
22561 min_prev_count = 0;
22562 break;
22564 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
22566 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
22568 int count = ix86_count_insn_bb (e->src);
22569 if (count < min_prev_count)
22570 min_prev_count = count;
22571 break;
22576 if (min_prev_count < 4)
22577 min_prev_count += ix86_count_insn_bb (bb);
22579 return min_prev_count;
22582 /* Pad short function to 4 instructions. */
22584 static void
22585 ix86_pad_short_function (void)
22587 edge e;
22588 edge_iterator ei;
22590 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22592 rtx_insn *ret = BB_END (e->src);
22593 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
22595 int insn_count = ix86_count_insn (e->src);
22597 /* Pad short function. */
22598 if (insn_count < 4)
22600 rtx_insn *insn = ret;
22602 /* Find epilogue. */
22603 while (insn
22604 && (!NOTE_P (insn)
22605 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
22606 insn = PREV_INSN (insn);
22608 if (!insn)
22609 insn = ret;
22611 /* Two NOPs count as one instruction. */
22612 insn_count = 2 * (4 - insn_count);
22613 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
22619 /* Fix up a Windows system unwinder issue. If an EH region falls through into
22620 the epilogue, the Windows system unwinder will apply epilogue logic and
22621 produce incorrect offsets. This can be avoided by adding a nop between
22622 the last insn that can throw and the first insn of the epilogue. */
22624 static void
22625 ix86_seh_fixup_eh_fallthru (void)
22627 edge e;
22628 edge_iterator ei;
22630 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22632 rtx_insn *insn, *next;
22634 /* Find the beginning of the epilogue. */
22635 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
22636 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
22637 break;
22638 if (insn == NULL)
22639 continue;
22641 /* We only care about preceding insns that can throw. */
22642 insn = prev_active_insn (insn);
22643 if (insn == NULL || !can_throw_internal (insn))
22644 continue;
22646 /* Do not separate calls from their debug information. */
22647 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
22648 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
22649 insn = next;
22650 else
22651 break;
22653 emit_insn_after (gen_nops (const1_rtx), insn);
22656 /* Split vector load from parm_decl to elemental loads to avoid STLF
22657 stalls. */
22658 static void
22659 ix86_split_stlf_stall_load ()
22661 rtx_insn* insn, *start = get_insns ();
22662 unsigned window = 0;
22664 for (insn = start; insn; insn = NEXT_INSN (insn))
22666 if (!NONDEBUG_INSN_P (insn))
22667 continue;
22668 window++;
22669 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
22670 other, just emulate for pipeline) before stalled load, stlf stall
22671 case is as fast as no stall cases on CLX.
22672 Since CFG is freed before machine_reorg, just do a rough
22673 calculation of the window according to the layout. */
22674 if (window > (unsigned) x86_stlf_window_ninsns)
22675 return;
22677 if (any_uncondjump_p (insn)
22678 || ANY_RETURN_P (PATTERN (insn))
22679 || CALL_P (insn))
22680 return;
22682 rtx set = single_set (insn);
22683 if (!set)
22684 continue;
22685 rtx src = SET_SRC (set);
22686 if (!MEM_P (src)
22687 /* Only handle V2DFmode load since it doesn't need any scratch
22688 register. */
22689 || GET_MODE (src) != E_V2DFmode
22690 || !MEM_EXPR (src)
22691 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
22692 continue;
22694 rtx zero = CONST0_RTX (V2DFmode);
22695 rtx dest = SET_DEST (set);
22696 rtx m = adjust_address (src, DFmode, 0);
22697 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
22698 emit_insn_before (loadlpd, insn);
22699 m = adjust_address (src, DFmode, 8);
22700 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
22701 if (dump_file && (dump_flags & TDF_DETAILS))
22703 fputs ("Due to potential STLF stall, split instruction:\n",
22704 dump_file);
22705 print_rtl_single (dump_file, insn);
22706 fputs ("To:\n", dump_file);
22707 print_rtl_single (dump_file, loadlpd);
22708 print_rtl_single (dump_file, loadhpd);
22710 PATTERN (insn) = loadhpd;
22711 INSN_CODE (insn) = -1;
22712 gcc_assert (recog_memoized (insn) != -1);
22716 /* Implement machine specific optimizations. We implement padding of returns
22717 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
22718 static void
22719 ix86_reorg (void)
22721 /* We are freeing block_for_insn in the toplev to keep compatibility
22722 with old MDEP_REORGS that are not CFG based. Recompute it now. */
22723 compute_bb_for_insn ();
22725 if (TARGET_SEH && current_function_has_exception_handlers ())
22726 ix86_seh_fixup_eh_fallthru ();
22728 if (optimize && optimize_function_for_speed_p (cfun))
22730 if (TARGET_SSE2)
22731 ix86_split_stlf_stall_load ();
22732 if (TARGET_PAD_SHORT_FUNCTION)
22733 ix86_pad_short_function ();
22734 else if (TARGET_PAD_RETURNS)
22735 ix86_pad_returns ();
22736 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22737 if (TARGET_FOUR_JUMP_LIMIT)
22738 ix86_avoid_jump_mispredicts ();
22739 #endif
22743 /* Return nonzero when QImode register that must be represented via REX prefix
22744 is used. */
22745 bool
22746 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
22748 int i;
22749 extract_insn_cached (insn);
22750 for (i = 0; i < recog_data.n_operands; i++)
22751 if (GENERAL_REG_P (recog_data.operand[i])
22752 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
22753 return true;
22754 return false;
22757 /* Return true when INSN mentions register that must be encoded using REX
22758 prefix. */
22759 bool
22760 x86_extended_reg_mentioned_p (rtx insn)
22762 subrtx_iterator::array_type array;
22763 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
22765 const_rtx x = *iter;
22766 if (REG_P (x)
22767 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
22768 return true;
22770 return false;
22773 /* If profitable, negate (without causing overflow) integer constant
22774 of mode MODE at location LOC. Return true in this case. */
22775 bool
22776 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
22778 HOST_WIDE_INT val;
22780 if (!CONST_INT_P (*loc))
22781 return false;
22783 switch (mode)
22785 case E_DImode:
22786 /* DImode x86_64 constants must fit in 32 bits. */
22787 gcc_assert (x86_64_immediate_operand (*loc, mode));
22789 mode = SImode;
22790 break;
22792 case E_SImode:
22793 case E_HImode:
22794 case E_QImode:
22795 break;
22797 default:
22798 gcc_unreachable ();
22801 /* Avoid overflows. */
22802 if (mode_signbit_p (mode, *loc))
22803 return false;
22805 val = INTVAL (*loc);
22807 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
22808 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
22809 if ((val < 0 && val != -128)
22810 || val == 128)
22812 *loc = GEN_INT (-val);
22813 return true;
22816 return false;
22819 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
22820 optabs would emit if we didn't have TFmode patterns. */
22822 void
22823 x86_emit_floatuns (rtx operands[2])
22825 rtx_code_label *neglab, *donelab;
22826 rtx i0, i1, f0, in, out;
22827 machine_mode mode, inmode;
22829 inmode = GET_MODE (operands[1]);
22830 gcc_assert (inmode == SImode || inmode == DImode);
22832 out = operands[0];
22833 in = force_reg (inmode, operands[1]);
22834 mode = GET_MODE (out);
22835 neglab = gen_label_rtx ();
22836 donelab = gen_label_rtx ();
22837 f0 = gen_reg_rtx (mode);
22839 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
22841 expand_float (out, in, 0);
22843 emit_jump_insn (gen_jump (donelab));
22844 emit_barrier ();
22846 emit_label (neglab);
22848 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
22849 1, OPTAB_DIRECT);
22850 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
22851 1, OPTAB_DIRECT);
22852 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
22854 expand_float (f0, i0, 0);
22856 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
22858 emit_label (donelab);
22861 /* Return the diagnostic message string if conversion from FROMTYPE to
22862 TOTYPE is not allowed, NULL otherwise. */
22864 static const char *
22865 ix86_invalid_conversion (const_tree fromtype, const_tree totype)
22867 machine_mode from_mode = element_mode (fromtype);
22868 machine_mode to_mode = element_mode (totype);
22870 if (!TARGET_SSE2 && from_mode != to_mode)
22872 /* Do no allow conversions to/from BFmode/HFmode scalar types
22873 when TARGET_SSE2 is not available. */
22874 if (from_mode == BFmode)
22875 return N_("invalid conversion from type %<__bf16%> "
22876 "without option %<-msse2%>");
22877 if (from_mode == HFmode)
22878 return N_("invalid conversion from type %<_Float16%> "
22879 "without option %<-msse2%>");
22880 if (to_mode == BFmode)
22881 return N_("invalid conversion to type %<__bf16%> "
22882 "without option %<-msse2%>");
22883 if (to_mode == HFmode)
22884 return N_("invalid conversion to type %<_Float16%> "
22885 "without option %<-msse2%>");
22888 /* Warn for silent implicit conversion between __bf16 and short,
22889 since __bfloat16 is refined as real __bf16 instead of short
22890 since GCC13. */
22891 if (element_mode (fromtype) != element_mode (totype)
22892 && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
22894 /* Warn for silent implicit conversion where user may expect
22895 a bitcast. */
22896 if ((TYPE_MODE (fromtype) == BFmode
22897 && TYPE_MODE (totype) == HImode)
22898 || (TYPE_MODE (totype) == BFmode
22899 && TYPE_MODE (fromtype) == HImode))
22900 warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
22901 "to real %<__bf16%> since GCC 13.1, be careful of "
22902 "implicit conversion between %<__bf16%> and %<short%>; "
22903 "an explicit bitcast may be needed here");
22906 /* Conversion allowed. */
22907 return NULL;
22910 /* Return the diagnostic message string if the unary operation OP is
22911 not permitted on TYPE, NULL otherwise. */
22913 static const char *
22914 ix86_invalid_unary_op (int op, const_tree type)
22916 machine_mode mmode = element_mode (type);
22917 /* Reject all single-operand operations on BFmode/HFmode except for &
22918 when TARGET_SSE2 is not available. */
22919 if (!TARGET_SSE2 && op != ADDR_EXPR)
22921 if (mmode == BFmode)
22922 return N_("operation not permitted on type %<__bf16%> "
22923 "without option %<-msse2%>");
22924 if (mmode == HFmode)
22925 return N_("operation not permitted on type %<_Float16%> "
22926 "without option %<-msse2%>");
22929 /* Operation allowed. */
22930 return NULL;
22933 /* Return the diagnostic message string if the binary operation OP is
22934 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22936 static const char *
22937 ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
22938 const_tree type2)
22940 machine_mode type1_mode = element_mode (type1);
22941 machine_mode type2_mode = element_mode (type2);
22942 /* Reject all 2-operand operations on BFmode or HFmode
22943 when TARGET_SSE2 is not available. */
22944 if (!TARGET_SSE2)
22946 if (type1_mode == BFmode || type2_mode == BFmode)
22947 return N_("operation not permitted on type %<__bf16%> "
22948 "without option %<-msse2%>");
22950 if (type1_mode == HFmode || type2_mode == HFmode)
22951 return N_("operation not permitted on type %<_Float16%> "
22952 "without option %<-msse2%>");
22955 /* Operation allowed. */
22956 return NULL;
22960 /* Target hook for scalar_mode_supported_p. */
22961 static bool
22962 ix86_scalar_mode_supported_p (scalar_mode mode)
22964 if (DECIMAL_FLOAT_MODE_P (mode))
22965 return default_decimal_float_supported_p ();
22966 else if (mode == TFmode)
22967 return true;
22968 else if (mode == HFmode || mode == BFmode)
22969 return true;
22970 else
22971 return default_scalar_mode_supported_p (mode);
22974 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
22975 if MODE is HFmode, and punt to the generic implementation otherwise. */
22977 static bool
22978 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
22980 /* NB: Always return TRUE for HFmode so that the _Float16 type will
22981 be defined by the C front-end for AVX512FP16 intrinsics. We will
22982 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
22983 enabled. */
22984 return ((mode == HFmode || mode == BFmode)
22985 ? true
22986 : default_libgcc_floating_mode_supported_p (mode));
22989 /* Implements target hook vector_mode_supported_p. */
22990 static bool
22991 ix86_vector_mode_supported_p (machine_mode mode)
22993 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
22994 either. */
22995 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
22996 return false;
22997 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22998 return true;
22999 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
23000 return true;
23001 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
23002 return true;
23003 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
23004 return true;
23005 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
23006 && VALID_MMX_REG_MODE (mode))
23007 return true;
23008 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
23009 && VALID_MMX_REG_MODE_3DNOW (mode))
23010 return true;
23011 if (mode == V2QImode)
23012 return true;
23013 return false;
23016 /* Target hook for c_mode_for_suffix. */
23017 static machine_mode
23018 ix86_c_mode_for_suffix (char suffix)
23020 if (suffix == 'q')
23021 return TFmode;
23022 if (suffix == 'w')
23023 return XFmode;
23025 return VOIDmode;
23028 /* Worker function for TARGET_MD_ASM_ADJUST.
23030 We implement asm flag outputs, and maintain source compatibility
23031 with the old cc0-based compiler. */
23033 static rtx_insn *
23034 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
23035 vec<machine_mode> & /*input_modes*/,
23036 vec<const char *> &constraints, vec<rtx> &clobbers,
23037 HARD_REG_SET &clobbered_regs, location_t loc)
23039 bool saw_asm_flag = false;
23041 start_sequence ();
23042 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
23044 const char *con = constraints[i];
23045 if (!startswith (con, "=@cc"))
23046 continue;
23047 con += 4;
23048 if (strchr (con, ',') != NULL)
23050 error_at (loc, "alternatives not allowed in %<asm%> flag output");
23051 continue;
23054 bool invert = false;
23055 if (con[0] == 'n')
23056 invert = true, con++;
23058 machine_mode mode = CCmode;
23059 rtx_code code = UNKNOWN;
23061 switch (con[0])
23063 case 'a':
23064 if (con[1] == 0)
23065 mode = CCAmode, code = EQ;
23066 else if (con[1] == 'e' && con[2] == 0)
23067 mode = CCCmode, code = NE;
23068 break;
23069 case 'b':
23070 if (con[1] == 0)
23071 mode = CCCmode, code = EQ;
23072 else if (con[1] == 'e' && con[2] == 0)
23073 mode = CCAmode, code = NE;
23074 break;
23075 case 'c':
23076 if (con[1] == 0)
23077 mode = CCCmode, code = EQ;
23078 break;
23079 case 'e':
23080 if (con[1] == 0)
23081 mode = CCZmode, code = EQ;
23082 break;
23083 case 'g':
23084 if (con[1] == 0)
23085 mode = CCGCmode, code = GT;
23086 else if (con[1] == 'e' && con[2] == 0)
23087 mode = CCGCmode, code = GE;
23088 break;
23089 case 'l':
23090 if (con[1] == 0)
23091 mode = CCGCmode, code = LT;
23092 else if (con[1] == 'e' && con[2] == 0)
23093 mode = CCGCmode, code = LE;
23094 break;
23095 case 'o':
23096 if (con[1] == 0)
23097 mode = CCOmode, code = EQ;
23098 break;
23099 case 'p':
23100 if (con[1] == 0)
23101 mode = CCPmode, code = EQ;
23102 break;
23103 case 's':
23104 if (con[1] == 0)
23105 mode = CCSmode, code = EQ;
23106 break;
23107 case 'z':
23108 if (con[1] == 0)
23109 mode = CCZmode, code = EQ;
23110 break;
23112 if (code == UNKNOWN)
23114 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
23115 continue;
23117 if (invert)
23118 code = reverse_condition (code);
23120 rtx dest = outputs[i];
23121 if (!saw_asm_flag)
23123 /* This is the first asm flag output. Here we put the flags
23124 register in as the real output and adjust the condition to
23125 allow it. */
23126 constraints[i] = "=Bf";
23127 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
23128 saw_asm_flag = true;
23130 else
23132 /* We don't need the flags register as output twice. */
23133 constraints[i] = "=X";
23134 outputs[i] = gen_rtx_SCRATCH (SImode);
23137 rtx x = gen_rtx_REG (mode, FLAGS_REG);
23138 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
23140 machine_mode dest_mode = GET_MODE (dest);
23141 if (!SCALAR_INT_MODE_P (dest_mode))
23143 error_at (loc, "invalid type for %<asm%> flag output");
23144 continue;
23147 if (dest_mode == QImode)
23148 emit_insn (gen_rtx_SET (dest, x));
23149 else
23151 rtx reg = gen_reg_rtx (QImode);
23152 emit_insn (gen_rtx_SET (reg, x));
23154 reg = convert_to_mode (dest_mode, reg, 1);
23155 emit_move_insn (dest, reg);
23159 rtx_insn *seq = get_insns ();
23160 end_sequence ();
23162 if (saw_asm_flag)
23163 return seq;
23164 else
23166 /* If we had no asm flag outputs, clobber the flags. */
23167 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
23168 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
23169 return NULL;
23173 /* Implements target vector targetm.asm.encode_section_info. */
23175 static void ATTRIBUTE_UNUSED
23176 ix86_encode_section_info (tree decl, rtx rtl, int first)
23178 default_encode_section_info (decl, rtl, first);
23180 if (ix86_in_large_data_p (decl))
23181 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
23184 /* Worker function for REVERSE_CONDITION. */
23186 enum rtx_code
23187 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
23189 return (mode == CCFPmode
23190 ? reverse_condition_maybe_unordered (code)
23191 : reverse_condition (code));
23194 /* Output code to perform an x87 FP register move, from OPERANDS[1]
23195 to OPERANDS[0]. */
23197 const char *
23198 output_387_reg_move (rtx_insn *insn, rtx *operands)
23200 if (REG_P (operands[0]))
23202 if (REG_P (operands[1])
23203 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23205 if (REGNO (operands[0]) == FIRST_STACK_REG)
23206 return output_387_ffreep (operands, 0);
23207 return "fstp\t%y0";
23209 if (STACK_TOP_P (operands[0]))
23210 return "fld%Z1\t%y1";
23211 return "fst\t%y0";
23213 else if (MEM_P (operands[0]))
23215 gcc_assert (REG_P (operands[1]));
23216 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
23217 return "fstp%Z0\t%y0";
23218 else
23220 /* There is no non-popping store to memory for XFmode.
23221 So if we need one, follow the store with a load. */
23222 if (GET_MODE (operands[0]) == XFmode)
23223 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
23224 else
23225 return "fst%Z0\t%y0";
23228 else
23229 gcc_unreachable();
23231 #ifdef TARGET_SOLARIS
23232 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
23234 static void
23235 i386_solaris_elf_named_section (const char *name, unsigned int flags,
23236 tree decl)
23238 /* With Binutils 2.15, the "@unwind" marker must be specified on
23239 every occurrence of the ".eh_frame" section, not just the first
23240 one. */
23241 if (TARGET_64BIT
23242 && strcmp (name, ".eh_frame") == 0)
23244 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
23245 flags & SECTION_WRITE ? "aw" : "a");
23246 return;
23249 #ifndef USE_GAS
23250 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
23252 solaris_elf_asm_comdat_section (name, flags, decl);
23253 return;
23256 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
23257 SPARC assembler. One cannot mix single-letter flags and #exclude, so
23258 only emit the latter here. */
23259 if (flags & SECTION_EXCLUDE)
23261 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
23262 return;
23264 #endif
23266 default_elf_asm_named_section (name, flags, decl);
23268 #endif /* TARGET_SOLARIS */
23270 /* Return the mangling of TYPE if it is an extended fundamental type. */
23272 static const char *
23273 ix86_mangle_type (const_tree type)
23275 type = TYPE_MAIN_VARIANT (type);
23277 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
23278 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
23279 return NULL;
23281 if (type == float128_type_node || type == float64x_type_node)
23282 return NULL;
23284 switch (TYPE_MODE (type))
23286 case E_BFmode:
23287 return "DF16b";
23288 case E_HFmode:
23289 /* _Float16 is "DF16_".
23290 Align with clang's decision in https://reviews.llvm.org/D33719. */
23291 return "DF16_";
23292 case E_TFmode:
23293 /* __float128 is "g". */
23294 return "g";
23295 case E_XFmode:
23296 /* "long double" or __float80 is "e". */
23297 return "e";
23298 default:
23299 return NULL;
23303 /* Create C++ tinfo symbols for only conditionally available fundamental
23304 types. */
23306 static void
23307 ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
23309 extern tree ix86_float16_type_node;
23310 extern tree ix86_bf16_type_node;
23312 if (!TARGET_SSE2)
23314 if (!float16_type_node)
23315 float16_type_node = ix86_float16_type_node;
23316 if (!bfloat16_type_node)
23317 bfloat16_type_node = ix86_bf16_type_node;
23318 callback (float16_type_node);
23319 callback (bfloat16_type_node);
23320 float16_type_node = NULL_TREE;
23321 bfloat16_type_node = NULL_TREE;
23325 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
23327 static tree
23328 ix86_stack_protect_guard (void)
23330 if (TARGET_SSP_TLS_GUARD)
23332 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
23333 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
23334 tree type = build_qualified_type (type_node, qual);
23335 tree t;
23337 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
23339 t = ix86_tls_stack_chk_guard_decl;
23341 if (t == NULL)
23343 rtx x;
23345 t = build_decl
23346 (UNKNOWN_LOCATION, VAR_DECL,
23347 get_identifier (ix86_stack_protector_guard_symbol_str),
23348 type);
23349 TREE_STATIC (t) = 1;
23350 TREE_PUBLIC (t) = 1;
23351 DECL_EXTERNAL (t) = 1;
23352 TREE_USED (t) = 1;
23353 TREE_THIS_VOLATILE (t) = 1;
23354 DECL_ARTIFICIAL (t) = 1;
23355 DECL_IGNORED_P (t) = 1;
23357 /* Do not share RTL as the declaration is visible outside of
23358 current function. */
23359 x = DECL_RTL (t);
23360 RTX_FLAG (x, used) = 1;
23362 ix86_tls_stack_chk_guard_decl = t;
23365 else
23367 tree asptrtype = build_pointer_type (type);
23369 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
23370 t = build2 (MEM_REF, asptrtype, t,
23371 build_int_cst (asptrtype, 0));
23372 TREE_THIS_VOLATILE (t) = 1;
23375 return t;
23378 return default_stack_protect_guard ();
23381 /* For 32-bit code we can save PIC register setup by using
23382 __stack_chk_fail_local hidden function instead of calling
23383 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
23384 register, so it is better to call __stack_chk_fail directly. */
23386 static tree ATTRIBUTE_UNUSED
23387 ix86_stack_protect_fail (void)
23389 return TARGET_64BIT
23390 ? default_external_stack_protect_fail ()
23391 : default_hidden_stack_protect_fail ();
23394 /* Select a format to encode pointers in exception handling data. CODE
23395 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
23396 true if the symbol may be affected by dynamic relocations.
23398 ??? All x86 object file formats are capable of representing this.
23399 After all, the relocation needed is the same as for the call insn.
23400 Whether or not a particular assembler allows us to enter such, I
23401 guess we'll have to see. */
23404 asm_preferred_eh_data_format (int code, int global)
23406 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
23407 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
23409 int type = DW_EH_PE_sdata8;
23410 if (ptr_mode == SImode
23411 || ix86_cmodel == CM_SMALL_PIC
23412 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
23413 type = DW_EH_PE_sdata4;
23414 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
23417 if (ix86_cmodel == CM_SMALL
23418 || (ix86_cmodel == CM_MEDIUM && code))
23419 return DW_EH_PE_udata4;
23421 return DW_EH_PE_absptr;
23424 /* Implement targetm.vectorize.builtin_vectorization_cost. */
23425 static int
23426 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
23427 tree vectype, int)
23429 bool fp = false;
23430 machine_mode mode = TImode;
23431 int index;
23432 if (vectype != NULL)
23434 fp = FLOAT_TYPE_P (vectype);
23435 mode = TYPE_MODE (vectype);
23438 switch (type_of_cost)
23440 case scalar_stmt:
23441 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
23443 case scalar_load:
23444 /* load/store costs are relative to register move which is 2. Recompute
23445 it to COSTS_N_INSNS so everything have same base. */
23446 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
23447 : ix86_cost->int_load [2]) / 2;
23449 case scalar_store:
23450 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
23451 : ix86_cost->int_store [2]) / 2;
23453 case vector_stmt:
23454 return ix86_vec_cost (mode,
23455 fp ? ix86_cost->addss : ix86_cost->sse_op);
23457 case vector_load:
23458 index = sse_store_index (mode);
23459 /* See PR82713 - we may end up being called on non-vector type. */
23460 if (index < 0)
23461 index = 2;
23462 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
23464 case vector_store:
23465 index = sse_store_index (mode);
23466 /* See PR82713 - we may end up being called on non-vector type. */
23467 if (index < 0)
23468 index = 2;
23469 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
23471 case vec_to_scalar:
23472 case scalar_to_vec:
23473 return ix86_vec_cost (mode, ix86_cost->sse_op);
23475 /* We should have separate costs for unaligned loads and gather/scatter.
23476 Do that incrementally. */
23477 case unaligned_load:
23478 index = sse_store_index (mode);
23479 /* See PR82713 - we may end up being called on non-vector type. */
23480 if (index < 0)
23481 index = 2;
23482 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
23484 case unaligned_store:
23485 index = sse_store_index (mode);
23486 /* See PR82713 - we may end up being called on non-vector type. */
23487 if (index < 0)
23488 index = 2;
23489 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
23491 case vector_gather_load:
23492 return ix86_vec_cost (mode,
23493 COSTS_N_INSNS
23494 (ix86_cost->gather_static
23495 + ix86_cost->gather_per_elt
23496 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
23498 case vector_scatter_store:
23499 return ix86_vec_cost (mode,
23500 COSTS_N_INSNS
23501 (ix86_cost->scatter_static
23502 + ix86_cost->scatter_per_elt
23503 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
23505 case cond_branch_taken:
23506 return ix86_cost->cond_taken_branch_cost;
23508 case cond_branch_not_taken:
23509 return ix86_cost->cond_not_taken_branch_cost;
23511 case vec_perm:
23512 case vec_promote_demote:
23513 return ix86_vec_cost (mode, ix86_cost->sse_op);
23515 case vec_construct:
23517 int n = TYPE_VECTOR_SUBPARTS (vectype);
23518 /* N - 1 element inserts into an SSE vector, the possible
23519 GPR -> XMM move is accounted for in add_stmt_cost. */
23520 if (GET_MODE_BITSIZE (mode) <= 128)
23521 return (n - 1) * ix86_cost->sse_op;
23522 /* One vinserti128 for combining two SSE vectors for AVX256. */
23523 else if (GET_MODE_BITSIZE (mode) == 256)
23524 return ((n - 2) * ix86_cost->sse_op
23525 + ix86_vec_cost (mode, ix86_cost->addss));
23526 /* One vinserti64x4 and two vinserti128 for combining SSE
23527 and AVX256 vectors to AVX512. */
23528 else if (GET_MODE_BITSIZE (mode) == 512)
23529 return ((n - 4) * ix86_cost->sse_op
23530 + 3 * ix86_vec_cost (mode, ix86_cost->addss));
23531 gcc_unreachable ();
23534 default:
23535 gcc_unreachable ();
23540 /* This function returns the calling abi specific va_list type node.
23541 It returns the FNDECL specific va_list type. */
23543 static tree
23544 ix86_fn_abi_va_list (tree fndecl)
23546 if (!TARGET_64BIT)
23547 return va_list_type_node;
23548 gcc_assert (fndecl != NULL_TREE);
23550 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
23551 return ms_va_list_type_node;
23552 else
23553 return sysv_va_list_type_node;
23556 /* Returns the canonical va_list type specified by TYPE. If there
23557 is no valid TYPE provided, it return NULL_TREE. */
23559 static tree
23560 ix86_canonical_va_list_type (tree type)
23562 if (TARGET_64BIT)
23564 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
23565 return ms_va_list_type_node;
23567 if ((TREE_CODE (type) == ARRAY_TYPE
23568 && integer_zerop (array_type_nelts (type)))
23569 || POINTER_TYPE_P (type))
23571 tree elem_type = TREE_TYPE (type);
23572 if (TREE_CODE (elem_type) == RECORD_TYPE
23573 && lookup_attribute ("sysv_abi va_list",
23574 TYPE_ATTRIBUTES (elem_type)))
23575 return sysv_va_list_type_node;
23578 return NULL_TREE;
23581 return std_canonical_va_list_type (type);
23584 /* Iterate through the target-specific builtin types for va_list.
23585 IDX denotes the iterator, *PTREE is set to the result type of
23586 the va_list builtin, and *PNAME to its internal type.
23587 Returns zero if there is no element for this index, otherwise
23588 IDX should be increased upon the next call.
23589 Note, do not iterate a base builtin's name like __builtin_va_list.
23590 Used from c_common_nodes_and_builtins. */
23592 static int
23593 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
23595 if (TARGET_64BIT)
23597 switch (idx)
23599 default:
23600 break;
23602 case 0:
23603 *ptree = ms_va_list_type_node;
23604 *pname = "__builtin_ms_va_list";
23605 return 1;
23607 case 1:
23608 *ptree = sysv_va_list_type_node;
23609 *pname = "__builtin_sysv_va_list";
23610 return 1;
23614 return 0;
23617 #undef TARGET_SCHED_DISPATCH
23618 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
23619 #undef TARGET_SCHED_DISPATCH_DO
23620 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
23621 #undef TARGET_SCHED_REASSOCIATION_WIDTH
23622 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
23623 #undef TARGET_SCHED_REORDER
23624 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
23625 #undef TARGET_SCHED_ADJUST_PRIORITY
23626 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
23627 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
23628 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
23629 ix86_dependencies_evaluation_hook
23632 /* Implementation of reassociation_width target hook used by
23633 reassoc phase to identify parallelism level in reassociated
23634 tree. Statements tree_code is passed in OPC. Arguments type
23635 is passed in MODE. */
23637 static int
23638 ix86_reassociation_width (unsigned int op, machine_mode mode)
23640 int width = 1;
23641 /* Vector part. */
23642 if (VECTOR_MODE_P (mode))
23644 int div = 1;
23645 if (INTEGRAL_MODE_P (mode))
23646 width = ix86_cost->reassoc_vec_int;
23647 else if (FLOAT_MODE_P (mode))
23648 width = ix86_cost->reassoc_vec_fp;
23650 if (width == 1)
23651 return 1;
23653 /* Integer vector instructions execute in FP unit
23654 and can execute 3 additions and one multiplication per cycle. */
23655 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
23656 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
23657 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
23658 return 1;
23660 /* Account for targets that splits wide vectors into multiple parts. */
23661 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
23662 div = GET_MODE_BITSIZE (mode) / 256;
23663 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
23664 div = GET_MODE_BITSIZE (mode) / 128;
23665 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
23666 div = GET_MODE_BITSIZE (mode) / 64;
23667 width = (width + div - 1) / div;
23669 /* Scalar part. */
23670 else if (INTEGRAL_MODE_P (mode))
23671 width = ix86_cost->reassoc_int;
23672 else if (FLOAT_MODE_P (mode))
23673 width = ix86_cost->reassoc_fp;
23675 /* Avoid using too many registers in 32bit mode. */
23676 if (!TARGET_64BIT && width > 2)
23677 width = 2;
23678 return width;
23681 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
23682 place emms and femms instructions. */
23684 static machine_mode
23685 ix86_preferred_simd_mode (scalar_mode mode)
23687 if (!TARGET_SSE)
23688 return word_mode;
23690 switch (mode)
23692 case E_QImode:
23693 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
23694 return V64QImode;
23695 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23696 return V32QImode;
23697 else
23698 return V16QImode;
23700 case E_HImode:
23701 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
23702 return V32HImode;
23703 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23704 return V16HImode;
23705 else
23706 return V8HImode;
23708 case E_SImode:
23709 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23710 return V16SImode;
23711 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23712 return V8SImode;
23713 else
23714 return V4SImode;
23716 case E_DImode:
23717 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23718 return V8DImode;
23719 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23720 return V4DImode;
23721 else
23722 return V2DImode;
23724 case E_HFmode:
23725 if (TARGET_AVX512FP16)
23727 if (TARGET_AVX512VL)
23729 if (TARGET_PREFER_AVX128)
23730 return V8HFmode;
23731 else if (TARGET_PREFER_AVX256)
23732 return V16HFmode;
23734 return V32HFmode;
23736 return word_mode;
23738 case E_SFmode:
23739 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23740 return V16SFmode;
23741 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23742 return V8SFmode;
23743 else
23744 return V4SFmode;
23746 case E_DFmode:
23747 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23748 return V8DFmode;
23749 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23750 return V4DFmode;
23751 else if (TARGET_SSE2)
23752 return V2DFmode;
23753 /* FALLTHRU */
23755 default:
23756 return word_mode;
23760 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
23761 vectors. If AVX512F is enabled then try vectorizing with 512bit,
23762 256bit and 128bit vectors. */
23764 static unsigned int
23765 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
23767 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23769 modes->safe_push (V64QImode);
23770 modes->safe_push (V32QImode);
23771 modes->safe_push (V16QImode);
23773 else if (TARGET_AVX512F && all)
23775 modes->safe_push (V32QImode);
23776 modes->safe_push (V16QImode);
23777 modes->safe_push (V64QImode);
23779 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23781 modes->safe_push (V32QImode);
23782 modes->safe_push (V16QImode);
23784 else if (TARGET_AVX && all)
23786 modes->safe_push (V16QImode);
23787 modes->safe_push (V32QImode);
23789 else if (TARGET_SSE2)
23790 modes->safe_push (V16QImode);
23792 if (TARGET_MMX_WITH_SSE)
23793 modes->safe_push (V8QImode);
23795 if (TARGET_SSE2)
23796 modes->safe_push (V4QImode);
23798 return 0;
23801 /* Implemenation of targetm.vectorize.get_mask_mode. */
23803 static opt_machine_mode
23804 ix86_get_mask_mode (machine_mode data_mode)
23806 unsigned vector_size = GET_MODE_SIZE (data_mode);
23807 unsigned nunits = GET_MODE_NUNITS (data_mode);
23808 unsigned elem_size = vector_size / nunits;
23810 /* Scalar mask case. */
23811 if ((TARGET_AVX512F && vector_size == 64)
23812 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
23814 if (elem_size == 4
23815 || elem_size == 8
23816 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
23817 return smallest_int_mode_for_size (nunits);
23820 scalar_int_mode elem_mode
23821 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
23823 gcc_assert (elem_size * nunits == vector_size);
23825 return mode_for_vector (elem_mode, nunits);
23830 /* Return class of registers which could be used for pseudo of MODE
23831 and of class RCLASS for spilling instead of memory. Return NO_REGS
23832 if it is not possible or non-profitable. */
23834 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23836 static reg_class_t
23837 ix86_spill_class (reg_class_t rclass, machine_mode mode)
23839 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
23840 && TARGET_SSE2
23841 && TARGET_INTER_UNIT_MOVES_TO_VEC
23842 && TARGET_INTER_UNIT_MOVES_FROM_VEC
23843 && (mode == SImode || (TARGET_64BIT && mode == DImode))
23844 && INTEGER_CLASS_P (rclass))
23845 return ALL_SSE_REGS;
23846 return NO_REGS;
23849 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
23850 but returns a lower bound. */
23852 static unsigned int
23853 ix86_max_noce_ifcvt_seq_cost (edge e)
23855 bool predictable_p = predictable_edge_p (e);
23856 if (predictable_p)
23858 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
23859 return param_max_rtl_if_conversion_predictable_cost;
23861 else
23863 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
23864 return param_max_rtl_if_conversion_unpredictable_cost;
23867 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
23870 /* Return true if SEQ is a good candidate as a replacement for the
23871 if-convertible sequence described in IF_INFO. */
23873 static bool
23874 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
23876 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
23878 int cmov_cnt = 0;
23879 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
23880 Maybe we should allow even more conditional moves as long as they
23881 are used far enough not to stall the CPU, or also consider
23882 IF_INFO->TEST_BB succ edge probabilities. */
23883 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
23885 rtx set = single_set (insn);
23886 if (!set)
23887 continue;
23888 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
23889 continue;
23890 rtx src = SET_SRC (set);
23891 machine_mode mode = GET_MODE (src);
23892 if (GET_MODE_CLASS (mode) != MODE_INT
23893 && GET_MODE_CLASS (mode) != MODE_FLOAT)
23894 continue;
23895 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
23896 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
23897 continue;
23898 /* insn is CMOV or FCMOV. */
23899 if (++cmov_cnt > 1)
23900 return false;
23903 return default_noce_conversion_profitable_p (seq, if_info);
23906 /* x86-specific vector costs. */
23907 class ix86_vector_costs : public vector_costs
23909 using vector_costs::vector_costs;
23911 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
23912 stmt_vec_info stmt_info, slp_tree node,
23913 tree vectype, int misalign,
23914 vect_cost_model_location where) override;
23915 void finish_cost (const vector_costs *) override;
23918 /* Implement targetm.vectorize.create_costs. */
23920 static vector_costs *
23921 ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
23923 return new ix86_vector_costs (vinfo, costing_for_scalar);
23926 unsigned
23927 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
23928 stmt_vec_info stmt_info, slp_tree node,
23929 tree vectype, int misalign,
23930 vect_cost_model_location where)
23932 unsigned retval = 0;
23933 bool scalar_p
23934 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
23935 int stmt_cost = - 1;
23937 bool fp = false;
23938 machine_mode mode = scalar_p ? SImode : TImode;
23940 if (vectype != NULL)
23942 fp = FLOAT_TYPE_P (vectype);
23943 mode = TYPE_MODE (vectype);
23944 if (scalar_p)
23945 mode = TYPE_MODE (TREE_TYPE (vectype));
23948 if ((kind == vector_stmt || kind == scalar_stmt)
23949 && stmt_info
23950 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
23952 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
23953 /*machine_mode inner_mode = mode;
23954 if (VECTOR_MODE_P (mode))
23955 inner_mode = GET_MODE_INNER (mode);*/
23957 switch (subcode)
23959 case PLUS_EXPR:
23960 case POINTER_PLUS_EXPR:
23961 case MINUS_EXPR:
23962 if (kind == scalar_stmt)
23964 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23965 stmt_cost = ix86_cost->addss;
23966 else if (X87_FLOAT_MODE_P (mode))
23967 stmt_cost = ix86_cost->fadd;
23968 else
23969 stmt_cost = ix86_cost->add;
23971 else
23972 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
23973 : ix86_cost->sse_op);
23974 break;
23976 case MULT_EXPR:
23977 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
23978 take it as MULT_EXPR. */
23979 case MULT_HIGHPART_EXPR:
23980 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
23981 break;
23982 /* There's no direct instruction for WIDEN_MULT_EXPR,
23983 take emulation into account. */
23984 case WIDEN_MULT_EXPR:
23985 stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
23986 TYPE_UNSIGNED (vectype));
23987 break;
23989 case NEGATE_EXPR:
23990 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23991 stmt_cost = ix86_cost->sse_op;
23992 else if (X87_FLOAT_MODE_P (mode))
23993 stmt_cost = ix86_cost->fchs;
23994 else if (VECTOR_MODE_P (mode))
23995 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
23996 else
23997 stmt_cost = ix86_cost->add;
23998 break;
23999 case TRUNC_DIV_EXPR:
24000 case CEIL_DIV_EXPR:
24001 case FLOOR_DIV_EXPR:
24002 case ROUND_DIV_EXPR:
24003 case TRUNC_MOD_EXPR:
24004 case CEIL_MOD_EXPR:
24005 case FLOOR_MOD_EXPR:
24006 case RDIV_EXPR:
24007 case ROUND_MOD_EXPR:
24008 case EXACT_DIV_EXPR:
24009 stmt_cost = ix86_division_cost (ix86_cost, mode);
24010 break;
24012 case RSHIFT_EXPR:
24013 case LSHIFT_EXPR:
24014 case LROTATE_EXPR:
24015 case RROTATE_EXPR:
24017 tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
24018 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
24019 stmt_cost = ix86_shift_rotate_cost
24020 (ix86_cost,
24021 (subcode == RSHIFT_EXPR
24022 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
24023 ? ASHIFTRT : LSHIFTRT, mode,
24024 TREE_CODE (op2) == INTEGER_CST,
24025 cst_and_fits_in_hwi (op2)
24026 ? int_cst_value (op2) : -1,
24027 false, false, NULL, NULL);
24029 break;
24030 case NOP_EXPR:
24031 /* Only sign-conversions are free. */
24032 if (tree_nop_conversion_p
24033 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
24034 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
24035 stmt_cost = 0;
24036 break;
24038 case BIT_IOR_EXPR:
24039 case ABS_EXPR:
24040 case ABSU_EXPR:
24041 case MIN_EXPR:
24042 case MAX_EXPR:
24043 case BIT_XOR_EXPR:
24044 case BIT_AND_EXPR:
24045 case BIT_NOT_EXPR:
24046 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
24047 stmt_cost = ix86_cost->sse_op;
24048 else if (VECTOR_MODE_P (mode))
24049 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
24050 else
24051 stmt_cost = ix86_cost->add;
24052 break;
24053 default:
24054 break;
24058 combined_fn cfn;
24059 if ((kind == vector_stmt || kind == scalar_stmt)
24060 && stmt_info
24061 && stmt_info->stmt
24062 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
24063 switch (cfn)
24065 case CFN_FMA:
24066 stmt_cost = ix86_vec_cost (mode,
24067 mode == SFmode ? ix86_cost->fmass
24068 : ix86_cost->fmasd);
24069 break;
24070 case CFN_MULH:
24071 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
24072 break;
24073 default:
24074 break;
24077 /* If we do elementwise loads into a vector then we are bound by
24078 latency and execution resources for the many scalar loads
24079 (AGU and load ports). Try to account for this by scaling the
24080 construction cost by the number of elements involved. */
24081 if ((kind == vec_construct || kind == vec_to_scalar)
24082 && stmt_info
24083 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
24084 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
24085 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
24086 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
24087 != INTEGER_CST))
24088 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
24090 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
24091 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
24093 else if ((kind == vec_construct || kind == scalar_to_vec)
24094 && node
24095 && SLP_TREE_DEF_TYPE (node) == vect_external_def
24096 && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
24098 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
24099 unsigned i;
24100 tree op;
24101 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24102 if (TREE_CODE (op) == SSA_NAME)
24103 TREE_VISITED (op) = 0;
24104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24106 if (TREE_CODE (op) != SSA_NAME
24107 || TREE_VISITED (op))
24108 continue;
24109 TREE_VISITED (op) = 1;
24110 gimple *def = SSA_NAME_DEF_STMT (op);
24111 tree tem;
24112 if (is_gimple_assign (def)
24113 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
24114 && ((tem = gimple_assign_rhs1 (def)), true)
24115 && TREE_CODE (tem) == SSA_NAME
24116 /* A sign-change expands to nothing. */
24117 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
24118 TREE_TYPE (tem)))
24119 def = SSA_NAME_DEF_STMT (tem);
24120 /* When the component is loaded from memory we can directly
24121 move it to a vector register, otherwise we have to go
24122 via a GPR or via vpinsr which involves similar cost.
24123 Likewise with a BIT_FIELD_REF extracting from a vector
24124 register we can hope to avoid using a GPR. */
24125 if (!is_gimple_assign (def)
24126 || ((!gimple_assign_load_p (def)
24127 || (!TARGET_SSE4_1
24128 && GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) == 1))
24129 && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
24130 || !VECTOR_TYPE_P (TREE_TYPE
24131 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
24132 stmt_cost += ix86_cost->sse_to_integer;
24134 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
24135 if (TREE_CODE (op) == SSA_NAME)
24136 TREE_VISITED (op) = 0;
24138 if (stmt_cost == -1)
24139 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
24141 /* Penalize DFmode vector operations for Bonnell. */
24142 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
24143 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
24144 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
24146 /* Statements in an inner loop relative to the loop being
24147 vectorized are weighted more heavily. The value here is
24148 arbitrary and could potentially be improved with analysis. */
24149 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
24151 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
24152 for Silvermont as it has out of order integer pipeline and can execute
24153 2 scalar instruction per tick, but has in order SIMD pipeline. */
24154 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
24155 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
24156 && stmt_info && stmt_info->stmt)
24158 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
24159 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
24160 retval = (retval * 17) / 10;
24163 m_costs[where] += retval;
24165 return retval;
24168 void
24169 ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
24171 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
24172 if (loop_vinfo && !m_costing_for_scalar)
24174 /* We are currently not asking the vectorizer to compare costs
24175 between different vector mode sizes. When using predication
24176 that will end up always choosing the prefered mode size even
24177 if there's a smaller mode covering all lanes. Test for this
24178 situation and artificially reject the larger mode attempt.
24179 ??? We currently lack masked ops for sub-SSE sized modes,
24180 so we could restrict this rejection to AVX and AVX512 modes
24181 but error on the safe side for now. */
24182 if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
24183 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
24184 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
24185 && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
24186 > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
24187 m_costs[vect_body] = INT_MAX;
24190 vector_costs::finish_cost (scalar_costs);
24193 /* Validate target specific memory model bits in VAL. */
24195 static unsigned HOST_WIDE_INT
24196 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
24198 enum memmodel model = memmodel_from_int (val);
24199 bool strong;
24201 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
24202 |MEMMODEL_MASK)
24203 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
24205 warning (OPT_Winvalid_memory_model,
24206 "unknown architecture specific memory model");
24207 return MEMMODEL_SEQ_CST;
24209 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
24210 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
24212 warning (OPT_Winvalid_memory_model,
24213 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
24214 "memory model");
24215 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
24217 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
24219 warning (OPT_Winvalid_memory_model,
24220 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
24221 "memory model");
24222 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
24224 return val;
24227 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
24228 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
24229 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
24230 or number of vecsize_mangle variants that should be emitted. */
24232 static int
24233 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
24234 struct cgraph_simd_clone *clonei,
24235 tree base_type, int num,
24236 bool explicit_p)
24238 int ret = 1;
24240 if (clonei->simdlen
24241 && (clonei->simdlen < 2
24242 || clonei->simdlen > 1024
24243 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
24245 if (explicit_p)
24246 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24247 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
24248 return 0;
24251 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
24252 if (TREE_CODE (ret_type) != VOID_TYPE)
24253 switch (TYPE_MODE (ret_type))
24255 case E_QImode:
24256 case E_HImode:
24257 case E_SImode:
24258 case E_DImode:
24259 case E_SFmode:
24260 case E_DFmode:
24261 /* case E_SCmode: */
24262 /* case E_DCmode: */
24263 if (!AGGREGATE_TYPE_P (ret_type))
24264 break;
24265 /* FALLTHRU */
24266 default:
24267 if (explicit_p)
24268 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24269 "unsupported return type %qT for simd", ret_type);
24270 return 0;
24273 tree t;
24274 int i;
24275 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
24276 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
24278 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
24279 t && t != void_list_node; t = TREE_CHAIN (t), i++)
24281 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
24282 switch (TYPE_MODE (arg_type))
24284 case E_QImode:
24285 case E_HImode:
24286 case E_SImode:
24287 case E_DImode:
24288 case E_SFmode:
24289 case E_DFmode:
24290 /* case E_SCmode: */
24291 /* case E_DCmode: */
24292 if (!AGGREGATE_TYPE_P (arg_type))
24293 break;
24294 /* FALLTHRU */
24295 default:
24296 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
24297 break;
24298 if (explicit_p)
24299 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24300 "unsupported argument type %qT for simd", arg_type);
24301 return 0;
24305 if (!TREE_PUBLIC (node->decl) || !explicit_p)
24307 /* If the function isn't exported, we can pick up just one ISA
24308 for the clones. */
24309 if (TARGET_AVX512F)
24310 clonei->vecsize_mangle = 'e';
24311 else if (TARGET_AVX2)
24312 clonei->vecsize_mangle = 'd';
24313 else if (TARGET_AVX)
24314 clonei->vecsize_mangle = 'c';
24315 else
24316 clonei->vecsize_mangle = 'b';
24317 ret = 1;
24319 else
24321 clonei->vecsize_mangle = "bcde"[num];
24322 ret = 4;
24324 clonei->mask_mode = VOIDmode;
24325 switch (clonei->vecsize_mangle)
24327 case 'b':
24328 clonei->vecsize_int = 128;
24329 clonei->vecsize_float = 128;
24330 break;
24331 case 'c':
24332 clonei->vecsize_int = 128;
24333 clonei->vecsize_float = 256;
24334 break;
24335 case 'd':
24336 clonei->vecsize_int = 256;
24337 clonei->vecsize_float = 256;
24338 break;
24339 case 'e':
24340 clonei->vecsize_int = 512;
24341 clonei->vecsize_float = 512;
24342 if (TYPE_MODE (base_type) == QImode)
24343 clonei->mask_mode = DImode;
24344 else
24345 clonei->mask_mode = SImode;
24346 break;
24348 if (clonei->simdlen == 0)
24350 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
24351 clonei->simdlen = clonei->vecsize_int;
24352 else
24353 clonei->simdlen = clonei->vecsize_float;
24354 clonei->simdlen = clonei->simdlen
24355 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
24357 else if (clonei->simdlen > 16)
24359 /* For compatibility with ICC, use the same upper bounds
24360 for simdlen. In particular, for CTYPE below, use the return type,
24361 unless the function returns void, in that case use the characteristic
24362 type. If it is possible for given SIMDLEN to pass CTYPE value
24363 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
24364 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
24365 emit corresponding clone. */
24366 tree ctype = ret_type;
24367 if (VOID_TYPE_P (ret_type))
24368 ctype = base_type;
24369 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
24370 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
24371 cnt /= clonei->vecsize_int;
24372 else
24373 cnt /= clonei->vecsize_float;
24374 if (cnt > (TARGET_64BIT ? 16 : 8))
24376 if (explicit_p)
24377 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
24378 "unsupported simdlen %wd",
24379 clonei->simdlen.to_constant ());
24380 return 0;
24383 return ret;
24386 /* If SIMD clone NODE can't be used in a vectorized loop
24387 in current function, return -1, otherwise return a badness of using it
24388 (0 if it is most desirable from vecsize_mangle point of view, 1
24389 slightly less desirable, etc.). */
24391 static int
24392 ix86_simd_clone_usable (struct cgraph_node *node)
24394 switch (node->simdclone->vecsize_mangle)
24396 case 'b':
24397 if (!TARGET_SSE2)
24398 return -1;
24399 if (!TARGET_AVX)
24400 return 0;
24401 return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
24402 case 'c':
24403 if (!TARGET_AVX)
24404 return -1;
24405 return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
24406 case 'd':
24407 if (!TARGET_AVX2)
24408 return -1;
24409 return TARGET_AVX512F ? 1 : 0;
24410 case 'e':
24411 if (!TARGET_AVX512F)
24412 return -1;
24413 return 0;
24414 default:
24415 gcc_unreachable ();
24419 /* This function adjusts the unroll factor based on
24420 the hardware capabilities. For ex, bdver3 has
24421 a loop buffer which makes unrolling of smaller
24422 loops less important. This function decides the
24423 unroll factor using number of memory references
24424 (value 32 is used) as a heuristic. */
24426 static unsigned
24427 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
24429 basic_block *bbs;
24430 rtx_insn *insn;
24431 unsigned i;
24432 unsigned mem_count = 0;
24434 /* Unroll small size loop when unroll factor is not explicitly
24435 specified. */
24436 if (ix86_unroll_only_small_loops && !loop->unroll)
24438 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
24439 return MIN (nunroll, ix86_cost->small_unroll_factor);
24440 else
24441 return 1;
24444 if (!TARGET_ADJUST_UNROLL)
24445 return nunroll;
24447 /* Count the number of memory references within the loop body.
24448 This value determines the unrolling factor for bdver3 and bdver4
24449 architectures. */
24450 subrtx_iterator::array_type array;
24451 bbs = get_loop_body (loop);
24452 for (i = 0; i < loop->num_nodes; i++)
24453 FOR_BB_INSNS (bbs[i], insn)
24454 if (NONDEBUG_INSN_P (insn))
24455 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
24456 if (const_rtx x = *iter)
24457 if (MEM_P (x))
24459 machine_mode mode = GET_MODE (x);
24460 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24461 if (n_words > 4)
24462 mem_count += 2;
24463 else
24464 mem_count += 1;
24466 free (bbs);
24468 if (mem_count && mem_count <=32)
24469 return MIN (nunroll, 32 / mem_count);
24471 return nunroll;
24475 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
24477 static bool
24478 ix86_float_exceptions_rounding_supported_p (void)
24480 /* For x87 floating point with standard excess precision handling,
24481 there is no adddf3 pattern (since x87 floating point only has
24482 XFmode operations) so the default hook implementation gets this
24483 wrong. */
24484 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
24487 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
24489 static void
24490 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
24492 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
24493 return;
24494 tree exceptions_var = create_tmp_var_raw (integer_type_node);
24495 if (TARGET_80387)
24497 tree fenv_index_type = build_index_type (size_int (6));
24498 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
24499 tree fenv_var = create_tmp_var_raw (fenv_type);
24500 TREE_ADDRESSABLE (fenv_var) = 1;
24501 tree fenv_ptr = build_pointer_type (fenv_type);
24502 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
24503 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
24504 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
24505 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
24506 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
24507 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
24508 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
24509 tree hold_fnclex = build_call_expr (fnclex, 0);
24510 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
24511 NULL_TREE, NULL_TREE);
24512 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
24513 hold_fnclex);
24514 *clear = build_call_expr (fnclex, 0);
24515 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
24516 tree fnstsw_call = build_call_expr (fnstsw, 0);
24517 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
24518 fnstsw_call, NULL_TREE, NULL_TREE);
24519 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
24520 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
24521 exceptions_var, exceptions_x87,
24522 NULL_TREE, NULL_TREE);
24523 *update = build2 (COMPOUND_EXPR, integer_type_node,
24524 sw_mod, update_mod);
24525 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
24526 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
24528 if (TARGET_SSE && TARGET_SSE_MATH)
24530 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
24531 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
24532 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
24533 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
24534 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
24535 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
24536 mxcsr_orig_var, stmxcsr_hold_call,
24537 NULL_TREE, NULL_TREE);
24538 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
24539 mxcsr_orig_var,
24540 build_int_cst (unsigned_type_node, 0x1f80));
24541 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
24542 build_int_cst (unsigned_type_node, 0xffffffc0));
24543 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
24544 mxcsr_mod_var, hold_mod_val,
24545 NULL_TREE, NULL_TREE);
24546 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
24547 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
24548 hold_assign_orig, hold_assign_mod);
24549 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
24550 ldmxcsr_hold_call);
24551 if (*hold)
24552 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
24553 else
24554 *hold = hold_all;
24555 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
24556 if (*clear)
24557 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
24558 ldmxcsr_clear_call);
24559 else
24560 *clear = ldmxcsr_clear_call;
24561 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
24562 tree exceptions_sse = fold_convert (integer_type_node,
24563 stxmcsr_update_call);
24564 if (*update)
24566 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
24567 exceptions_var, exceptions_sse);
24568 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
24569 exceptions_var, exceptions_mod);
24570 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
24571 exceptions_assign);
24573 else
24574 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
24575 exceptions_sse, NULL_TREE, NULL_TREE);
24576 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
24577 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
24578 ldmxcsr_update_call);
24580 tree atomic_feraiseexcept
24581 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
24582 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
24583 1, exceptions_var);
24584 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
24585 atomic_feraiseexcept_call);
24588 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
24589 /* For i386, common symbol is local only for non-PIE binaries. For
24590 x86-64, common symbol is local only for non-PIE binaries or linker
24591 supports copy reloc in PIE binaries. */
24593 static bool
24594 ix86_binds_local_p (const_tree exp)
24596 bool direct_extern_access
24597 = (ix86_direct_extern_access
24598 && !(VAR_OR_FUNCTION_DECL_P (exp)
24599 && lookup_attribute ("nodirect_extern_access",
24600 DECL_ATTRIBUTES (exp))));
24601 if (!direct_extern_access)
24602 ix86_has_no_direct_extern_access = true;
24603 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
24604 direct_extern_access,
24605 (direct_extern_access
24606 && (!flag_pic
24607 || (TARGET_64BIT
24608 && HAVE_LD_PIE_COPYRELOC != 0))));
24611 /* If flag_pic or ix86_direct_extern_access is false, then neither
24612 local nor global relocs should be placed in readonly memory. */
24614 static int
24615 ix86_reloc_rw_mask (void)
24617 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
24619 #endif
24621 /* If MEM is in the form of [base+offset], extract the two parts
24622 of address and set to BASE and OFFSET, otherwise return false. */
24624 static bool
24625 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
24627 rtx addr;
24629 gcc_assert (MEM_P (mem));
24631 addr = XEXP (mem, 0);
24633 if (GET_CODE (addr) == CONST)
24634 addr = XEXP (addr, 0);
24636 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
24638 *base = addr;
24639 *offset = const0_rtx;
24640 return true;
24643 if (GET_CODE (addr) == PLUS
24644 && (REG_P (XEXP (addr, 0))
24645 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
24646 && CONST_INT_P (XEXP (addr, 1)))
24648 *base = XEXP (addr, 0);
24649 *offset = XEXP (addr, 1);
24650 return true;
24653 return false;
24656 /* Given OPERANDS of consecutive load/store, check if we can merge
24657 them into move multiple. LOAD is true if they are load instructions.
24658 MODE is the mode of memory operands. */
24660 bool
24661 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
24662 machine_mode mode)
24664 HOST_WIDE_INT offval_1, offval_2, msize;
24665 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
24667 if (load)
24669 mem_1 = operands[1];
24670 mem_2 = operands[3];
24671 reg_1 = operands[0];
24672 reg_2 = operands[2];
24674 else
24676 mem_1 = operands[0];
24677 mem_2 = operands[2];
24678 reg_1 = operands[1];
24679 reg_2 = operands[3];
24682 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
24684 if (REGNO (reg_1) != REGNO (reg_2))
24685 return false;
24687 /* Check if the addresses are in the form of [base+offset]. */
24688 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
24689 return false;
24690 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
24691 return false;
24693 /* Check if the bases are the same. */
24694 if (!rtx_equal_p (base_1, base_2))
24695 return false;
24697 offval_1 = INTVAL (offset_1);
24698 offval_2 = INTVAL (offset_2);
24699 msize = GET_MODE_SIZE (mode);
24700 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
24701 if (offval_1 + msize != offval_2)
24702 return false;
24704 return true;
24707 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
24709 static bool
24710 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
24711 optimization_type opt_type)
24713 switch (op)
24715 case asin_optab:
24716 case acos_optab:
24717 case log1p_optab:
24718 case exp_optab:
24719 case exp10_optab:
24720 case exp2_optab:
24721 case expm1_optab:
24722 case ldexp_optab:
24723 case scalb_optab:
24724 case round_optab:
24725 case lround_optab:
24726 return opt_type == OPTIMIZE_FOR_SPEED;
24728 case rint_optab:
24729 if (SSE_FLOAT_MODE_P (mode1)
24730 && TARGET_SSE_MATH
24731 && !flag_trapping_math
24732 && !TARGET_SSE4_1
24733 && mode1 != HFmode)
24734 return opt_type == OPTIMIZE_FOR_SPEED;
24735 return true;
24737 case floor_optab:
24738 case ceil_optab:
24739 case btrunc_optab:
24740 if (((SSE_FLOAT_MODE_P (mode1)
24741 && TARGET_SSE_MATH
24742 && TARGET_SSE4_1)
24743 || mode1 == HFmode)
24744 && !flag_trapping_math)
24745 return true;
24746 return opt_type == OPTIMIZE_FOR_SPEED;
24748 case rsqrt_optab:
24749 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
24751 default:
24752 return true;
24756 /* Address space support.
24758 This is not "far pointers" in the 16-bit sense, but an easy way
24759 to use %fs and %gs segment prefixes. Therefore:
24761 (a) All address spaces have the same modes,
24762 (b) All address spaces have the same addresss forms,
24763 (c) While %fs and %gs are technically subsets of the generic
24764 address space, they are probably not subsets of each other.
24765 (d) Since we have no access to the segment base register values
24766 without resorting to a system call, we cannot convert a
24767 non-default address space to a default address space.
24768 Therefore we do not claim %fs or %gs are subsets of generic.
24770 Therefore we can (mostly) use the default hooks. */
24772 /* All use of segmentation is assumed to make address 0 valid. */
24774 static bool
24775 ix86_addr_space_zero_address_valid (addr_space_t as)
24777 return as != ADDR_SPACE_GENERIC;
24780 static void
24781 ix86_init_libfuncs (void)
24783 if (TARGET_64BIT)
24785 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
24786 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
24788 else
24790 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
24791 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
24794 #if TARGET_MACHO
24795 darwin_rename_builtins ();
24796 #endif
24799 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
24800 FPU, assume that the fpcw is set to extended precision; when using
24801 only SSE, rounding is correct; when using both SSE and the FPU,
24802 the rounding precision is indeterminate, since either may be chosen
24803 apparently at random. */
24805 static enum flt_eval_method
24806 ix86_get_excess_precision (enum excess_precision_type type)
24808 switch (type)
24810 case EXCESS_PRECISION_TYPE_FAST:
24811 /* The fastest type to promote to will always be the native type,
24812 whether that occurs with implicit excess precision or
24813 otherwise. */
24814 return TARGET_AVX512FP16
24815 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24816 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
24817 case EXCESS_PRECISION_TYPE_STANDARD:
24818 case EXCESS_PRECISION_TYPE_IMPLICIT:
24819 /* Otherwise, the excess precision we want when we are
24820 in a standards compliant mode, and the implicit precision we
24821 provide would be identical were it not for the unpredictable
24822 cases. */
24823 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
24824 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24825 else if (!TARGET_80387)
24826 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
24827 else if (!TARGET_MIX_SSE_I387)
24829 if (!(TARGET_SSE && TARGET_SSE_MATH))
24830 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
24831 else if (TARGET_SSE2)
24832 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
24835 /* If we are in standards compliant mode, but we know we will
24836 calculate in unpredictable precision, return
24837 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
24838 excess precision if the target can't guarantee it will honor
24839 it. */
24840 return (type == EXCESS_PRECISION_TYPE_STANDARD
24841 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
24842 : FLT_EVAL_METHOD_UNPREDICTABLE);
24843 case EXCESS_PRECISION_TYPE_FLOAT16:
24844 if (TARGET_80387
24845 && !(TARGET_SSE_MATH && TARGET_SSE))
24846 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
24847 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24848 default:
24849 gcc_unreachable ();
24852 return FLT_EVAL_METHOD_UNPREDICTABLE;
24855 /* Return true if _BitInt(N) is supported and fill its details into *INFO. */
24856 bool
24857 ix86_bitint_type_info (int n, struct bitint_info *info)
24859 if (!TARGET_64BIT)
24860 return false;
24861 if (n <= 8)
24862 info->limb_mode = QImode;
24863 else if (n <= 16)
24864 info->limb_mode = HImode;
24865 else if (n <= 32)
24866 info->limb_mode = SImode;
24867 else
24868 info->limb_mode = DImode;
24869 info->big_endian = false;
24870 info->extended = false;
24871 return true;
24874 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
24875 decrements by exactly 2 no matter what the position was, there is no pushb.
24877 But as CIE data alignment factor on this arch is -4 for 32bit targets
24878 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
24879 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
24881 poly_int64
24882 ix86_push_rounding (poly_int64 bytes)
24884 return ROUND_UP (bytes, UNITS_PER_WORD);
24887 /* Use 8 bits metadata start from bit48 for LAM_U48,
24888 6 bits metadat start from bit57 for LAM_U57. */
24889 #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
24890 ? 48 \
24891 : (ix86_lam_type == lam_u57 ? 57 : 0))
24892 #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
24893 ? 8 \
24894 : (ix86_lam_type == lam_u57 ? 6 : 0))
24896 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
24897 bool
24898 ix86_memtag_can_tag_addresses ()
24900 return ix86_lam_type != lam_none && TARGET_LP64;
24903 /* Implement TARGET_MEMTAG_TAG_SIZE. */
24904 unsigned char
24905 ix86_memtag_tag_size ()
24907 return IX86_HWASAN_TAG_SIZE;
24910 /* Implement TARGET_MEMTAG_SET_TAG. */
24912 ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
24914 /* default_memtag_insert_random_tag may
24915 generate tag with value more than 6 bits. */
24916 if (ix86_lam_type == lam_u57)
24918 unsigned HOST_WIDE_INT and_imm
24919 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
24921 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
24923 tag = expand_simple_binop (Pmode, ASHIFT, tag,
24924 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
24925 /* unsignedp = */1, OPTAB_WIDEN);
24926 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
24927 /* unsignedp = */1, OPTAB_DIRECT);
24928 return ret;
24931 /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
24933 ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
24935 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
24936 GEN_INT (IX86_HWASAN_SHIFT), target,
24937 /* unsignedp = */0,
24938 OPTAB_DIRECT);
24939 rtx ret = gen_reg_rtx (QImode);
24940 /* Mask off bit63 when LAM_U57. */
24941 if (ix86_lam_type == lam_u57)
24943 unsigned HOST_WIDE_INT and_imm
24944 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
24945 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
24946 gen_int_mode (and_imm, QImode)));
24948 else
24949 emit_move_insn (ret, gen_lowpart (QImode, tag));
24950 return ret;
24953 /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
24955 ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
24957 /* Leave bit63 alone. */
24958 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
24959 + (HOST_WIDE_INT_1U << 63) - 1),
24960 Pmode);
24961 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
24962 tag_mask, target, true,
24963 OPTAB_DIRECT);
24964 gcc_assert (untagged_base);
24965 return untagged_base;
24968 /* Implement TARGET_MEMTAG_ADD_TAG. */
24970 ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
24972 rtx base_tag = gen_reg_rtx (QImode);
24973 rtx base_addr = gen_reg_rtx (Pmode);
24974 rtx tagged_addr = gen_reg_rtx (Pmode);
24975 rtx new_tag = gen_reg_rtx (QImode);
24976 unsigned HOST_WIDE_INT and_imm
24977 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
24979 /* When there's "overflow" in tag adding,
24980 need to mask the most significant bit off. */
24981 emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
24982 emit_move_insn (base_addr,
24983 ix86_memtag_untagged_pointer (base, NULL_RTX));
24984 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
24985 emit_move_insn (new_tag, base_tag);
24986 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
24987 emit_move_insn (tagged_addr,
24988 ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
24989 return plus_constant (Pmode, tagged_addr, offset);
24992 /* Target-specific selftests. */
24994 #if CHECKING_P
24996 namespace selftest {
24998 /* Verify that hard regs are dumped as expected (in compact mode). */
25000 static void
25001 ix86_test_dumping_hard_regs ()
25003 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
25004 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
25007 /* Test dumping an insn with repeated references to the same SCRATCH,
25008 to verify the rtx_reuse code. */
25010 static void
25011 ix86_test_dumping_memory_blockage ()
25013 set_new_first_and_last_insn (NULL, NULL);
25015 rtx pat = gen_memory_blockage ();
25016 rtx_reuse_manager r;
25017 r.preprocess (pat);
25019 /* Verify that the repeated references to the SCRATCH show use
25020 reuse IDS. The first should be prefixed with a reuse ID,
25021 and the second should be dumped as a "reuse_rtx" of that ID.
25022 The expected string assumes Pmode == DImode. */
25023 if (Pmode == DImode)
25024 ASSERT_RTL_DUMP_EQ_WITH_REUSE
25025 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
25026 " (unspec:BLK [\n"
25027 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
25028 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
25031 /* Verify loading an RTL dump; specifically a dump of copying
25032 a param on x86_64 from a hard reg into the frame.
25033 This test is target-specific since the dump contains target-specific
25034 hard reg names. */
25036 static void
25037 ix86_test_loading_dump_fragment_1 ()
25039 rtl_dump_test t (SELFTEST_LOCATION,
25040 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
25042 rtx_insn *insn = get_insn_by_uid (1);
25044 /* The block structure and indentation here is purely for
25045 readability; it mirrors the structure of the rtx. */
25046 tree mem_expr;
25048 rtx pat = PATTERN (insn);
25049 ASSERT_EQ (SET, GET_CODE (pat));
25051 rtx dest = SET_DEST (pat);
25052 ASSERT_EQ (MEM, GET_CODE (dest));
25053 /* Verify the "/c" was parsed. */
25054 ASSERT_TRUE (RTX_FLAG (dest, call));
25055 ASSERT_EQ (SImode, GET_MODE (dest));
25057 rtx addr = XEXP (dest, 0);
25058 ASSERT_EQ (PLUS, GET_CODE (addr));
25059 ASSERT_EQ (DImode, GET_MODE (addr));
25061 rtx lhs = XEXP (addr, 0);
25062 /* Verify that the "frame" REG was consolidated. */
25063 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
25066 rtx rhs = XEXP (addr, 1);
25067 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
25068 ASSERT_EQ (-4, INTVAL (rhs));
25071 /* Verify the "[1 i+0 S4 A32]" was parsed. */
25072 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
25073 /* "i" should have been handled by synthesizing a global int
25074 variable named "i". */
25075 mem_expr = MEM_EXPR (dest);
25076 ASSERT_NE (mem_expr, NULL);
25077 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
25078 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
25079 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
25080 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
25081 /* "+0". */
25082 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
25083 ASSERT_EQ (0, MEM_OFFSET (dest));
25084 /* "S4". */
25085 ASSERT_EQ (4, MEM_SIZE (dest));
25086 /* "A32. */
25087 ASSERT_EQ (32, MEM_ALIGN (dest));
25090 rtx src = SET_SRC (pat);
25091 ASSERT_EQ (REG, GET_CODE (src));
25092 ASSERT_EQ (SImode, GET_MODE (src));
25093 ASSERT_EQ (5, REGNO (src));
25094 tree reg_expr = REG_EXPR (src);
25095 /* "i" here should point to the same var as for the MEM_EXPR. */
25096 ASSERT_EQ (reg_expr, mem_expr);
25101 /* Verify that the RTL loader copes with a call_insn dump.
25102 This test is target-specific since the dump contains a target-specific
25103 hard reg name. */
25105 static void
25106 ix86_test_loading_call_insn ()
25108 /* The test dump includes register "xmm0", where requires TARGET_SSE
25109 to exist. */
25110 if (!TARGET_SSE)
25111 return;
25113 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
25115 rtx_insn *insn = get_insns ();
25116 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
25118 /* "/j". */
25119 ASSERT_TRUE (RTX_FLAG (insn, jump));
25121 rtx pat = PATTERN (insn);
25122 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
25124 /* Verify REG_NOTES. */
25126 /* "(expr_list:REG_CALL_DECL". */
25127 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
25128 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
25129 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
25131 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
25132 rtx_expr_list *note1 = note0->next ();
25133 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
25135 ASSERT_EQ (NULL, note1->next ());
25138 /* Verify CALL_INSN_FUNCTION_USAGE. */
25140 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
25141 rtx_expr_list *usage
25142 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
25143 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
25144 ASSERT_EQ (DFmode, GET_MODE (usage));
25145 ASSERT_EQ (USE, GET_CODE (usage->element ()));
25146 ASSERT_EQ (NULL, usage->next ());
25150 /* Verify that the RTL loader copes a dump from print_rtx_function.
25151 This test is target-specific since the dump contains target-specific
25152 hard reg names. */
25154 static void
25155 ix86_test_loading_full_dump ()
25157 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
25159 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
25161 rtx_insn *insn_1 = get_insn_by_uid (1);
25162 ASSERT_EQ (NOTE, GET_CODE (insn_1));
25164 rtx_insn *insn_7 = get_insn_by_uid (7);
25165 ASSERT_EQ (INSN, GET_CODE (insn_7));
25166 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
25168 rtx_insn *insn_15 = get_insn_by_uid (15);
25169 ASSERT_EQ (INSN, GET_CODE (insn_15));
25170 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
25172 /* Verify crtl->return_rtx. */
25173 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
25174 ASSERT_EQ (0, REGNO (crtl->return_rtx));
25175 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
25178 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
25179 In particular, verify that it correctly loads the 2nd operand.
25180 This test is target-specific since these are machine-specific
25181 operands (and enums). */
25183 static void
25184 ix86_test_loading_unspec ()
25186 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
25188 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
25190 ASSERT_TRUE (cfun);
25192 /* Test of an UNSPEC. */
25193 rtx_insn *insn = get_insns ();
25194 ASSERT_EQ (INSN, GET_CODE (insn));
25195 rtx set = single_set (insn);
25196 ASSERT_NE (NULL, set);
25197 rtx dst = SET_DEST (set);
25198 ASSERT_EQ (MEM, GET_CODE (dst));
25199 rtx src = SET_SRC (set);
25200 ASSERT_EQ (UNSPEC, GET_CODE (src));
25201 ASSERT_EQ (BLKmode, GET_MODE (src));
25202 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
25204 rtx v0 = XVECEXP (src, 0, 0);
25206 /* Verify that the two uses of the first SCRATCH have pointer
25207 equality. */
25208 rtx scratch_a = XEXP (dst, 0);
25209 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
25211 rtx scratch_b = XEXP (v0, 0);
25212 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
25214 ASSERT_EQ (scratch_a, scratch_b);
25216 /* Verify that the two mems are thus treated as equal. */
25217 ASSERT_TRUE (rtx_equal_p (dst, v0));
25219 /* Verify that the insn is recognized. */
25220 ASSERT_NE(-1, recog_memoized (insn));
25222 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
25223 insn = NEXT_INSN (insn);
25224 ASSERT_EQ (INSN, GET_CODE (insn));
25226 set = single_set (insn);
25227 ASSERT_NE (NULL, set);
25229 src = SET_SRC (set);
25230 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
25231 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
25234 /* Run all target-specific selftests. */
25236 static void
25237 ix86_run_selftests (void)
25239 ix86_test_dumping_hard_regs ();
25240 ix86_test_dumping_memory_blockage ();
25242 /* Various tests of loading RTL dumps, here because they contain
25243 ix86-isms (e.g. names of hard regs). */
25244 ix86_test_loading_dump_fragment_1 ();
25245 ix86_test_loading_call_insn ();
25246 ix86_test_loading_full_dump ();
25247 ix86_test_loading_unspec ();
25250 } // namespace selftest
25252 #endif /* CHECKING_P */
25254 /* Initialize the GCC target structure. */
25255 #undef TARGET_RETURN_IN_MEMORY
25256 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25258 #undef TARGET_LEGITIMIZE_ADDRESS
25259 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
25261 #undef TARGET_ATTRIBUTE_TABLE
25262 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25263 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
25264 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
25265 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25266 # undef TARGET_MERGE_DECL_ATTRIBUTES
25267 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25268 #endif
25270 #undef TARGET_INVALID_CONVERSION
25271 #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
25273 #undef TARGET_INVALID_UNARY_OP
25274 #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
25276 #undef TARGET_INVALID_BINARY_OP
25277 #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
25279 #undef TARGET_COMP_TYPE_ATTRIBUTES
25280 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25282 #undef TARGET_INIT_BUILTINS
25283 #define TARGET_INIT_BUILTINS ix86_init_builtins
25284 #undef TARGET_BUILTIN_DECL
25285 #define TARGET_BUILTIN_DECL ix86_builtin_decl
25286 #undef TARGET_EXPAND_BUILTIN
25287 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25289 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25290 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25291 ix86_builtin_vectorized_function
25293 #undef TARGET_VECTORIZE_BUILTIN_GATHER
25294 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
25296 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
25297 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
25299 #undef TARGET_BUILTIN_RECIPROCAL
25300 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25302 #undef TARGET_ASM_FUNCTION_EPILOGUE
25303 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25305 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
25306 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
25307 ix86_print_patchable_function_entry
25309 #undef TARGET_ENCODE_SECTION_INFO
25310 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25311 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25312 #else
25313 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25314 #endif
25316 #undef TARGET_ASM_OPEN_PAREN
25317 #define TARGET_ASM_OPEN_PAREN ""
25318 #undef TARGET_ASM_CLOSE_PAREN
25319 #define TARGET_ASM_CLOSE_PAREN ""
25321 #undef TARGET_ASM_BYTE_OP
25322 #define TARGET_ASM_BYTE_OP ASM_BYTE
25324 #undef TARGET_ASM_ALIGNED_HI_OP
25325 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25326 #undef TARGET_ASM_ALIGNED_SI_OP
25327 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25328 #ifdef ASM_QUAD
25329 #undef TARGET_ASM_ALIGNED_DI_OP
25330 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25331 #endif
25333 #undef TARGET_PROFILE_BEFORE_PROLOGUE
25334 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
25336 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
25337 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
25339 #undef TARGET_ASM_UNALIGNED_HI_OP
25340 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25341 #undef TARGET_ASM_UNALIGNED_SI_OP
25342 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25343 #undef TARGET_ASM_UNALIGNED_DI_OP
25344 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25346 #undef TARGET_PRINT_OPERAND
25347 #define TARGET_PRINT_OPERAND ix86_print_operand
25348 #undef TARGET_PRINT_OPERAND_ADDRESS
25349 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
25350 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
25351 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
25352 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
25353 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
25355 #undef TARGET_SCHED_INIT_GLOBAL
25356 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
25357 #undef TARGET_SCHED_ADJUST_COST
25358 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25359 #undef TARGET_SCHED_ISSUE_RATE
25360 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25361 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25362 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25363 ia32_multipass_dfa_lookahead
25364 #undef TARGET_SCHED_MACRO_FUSION_P
25365 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
25366 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
25367 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
25369 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25370 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25372 #undef TARGET_MEMMODEL_CHECK
25373 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
25375 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
25376 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
25378 #ifdef HAVE_AS_TLS
25379 #undef TARGET_HAVE_TLS
25380 #define TARGET_HAVE_TLS true
25381 #endif
25382 #undef TARGET_CANNOT_FORCE_CONST_MEM
25383 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25384 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25385 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25387 #undef TARGET_DELEGITIMIZE_ADDRESS
25388 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25390 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
25391 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
25393 #undef TARGET_MS_BITFIELD_LAYOUT_P
25394 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25396 #if TARGET_MACHO
25397 #undef TARGET_BINDS_LOCAL_P
25398 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25399 #else
25400 #undef TARGET_BINDS_LOCAL_P
25401 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
25402 #endif
25403 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25404 #undef TARGET_BINDS_LOCAL_P
25405 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25406 #endif
25408 #undef TARGET_ASM_OUTPUT_MI_THUNK
25409 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25410 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25411 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25413 #undef TARGET_ASM_FILE_START
25414 #define TARGET_ASM_FILE_START x86_file_start
25416 #undef TARGET_OPTION_OVERRIDE
25417 #define TARGET_OPTION_OVERRIDE ix86_option_override
25419 #undef TARGET_REGISTER_MOVE_COST
25420 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
25421 #undef TARGET_MEMORY_MOVE_COST
25422 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
25423 #undef TARGET_RTX_COSTS
25424 #define TARGET_RTX_COSTS ix86_rtx_costs
25425 #undef TARGET_ADDRESS_COST
25426 #define TARGET_ADDRESS_COST ix86_address_cost
25428 #undef TARGET_OVERLAP_OP_BY_PIECES_P
25429 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
25431 #undef TARGET_FLAGS_REGNUM
25432 #define TARGET_FLAGS_REGNUM FLAGS_REG
25433 #undef TARGET_FIXED_CONDITION_CODE_REGS
25434 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25435 #undef TARGET_CC_MODES_COMPATIBLE
25436 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25438 #undef TARGET_MACHINE_DEPENDENT_REORG
25439 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25441 #undef TARGET_BUILD_BUILTIN_VA_LIST
25442 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25444 #undef TARGET_FOLD_BUILTIN
25445 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
25447 #undef TARGET_GIMPLE_FOLD_BUILTIN
25448 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
25450 #undef TARGET_COMPARE_VERSION_PRIORITY
25451 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
25453 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
25454 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
25455 ix86_generate_version_dispatcher_body
25457 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
25458 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
25459 ix86_get_function_versions_dispatcher
25461 #undef TARGET_ENUM_VA_LIST_P
25462 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
25464 #undef TARGET_FN_ABI_VA_LIST
25465 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
25467 #undef TARGET_CANONICAL_VA_LIST_TYPE
25468 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
25470 #undef TARGET_EXPAND_BUILTIN_VA_START
25471 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25473 #undef TARGET_MD_ASM_ADJUST
25474 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
25476 #undef TARGET_C_EXCESS_PRECISION
25477 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
25478 #undef TARGET_C_BITINT_TYPE_INFO
25479 #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
25480 #undef TARGET_PROMOTE_PROTOTYPES
25481 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25482 #undef TARGET_PUSH_ARGUMENT
25483 #define TARGET_PUSH_ARGUMENT ix86_push_argument
25484 #undef TARGET_SETUP_INCOMING_VARARGS
25485 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25486 #undef TARGET_MUST_PASS_IN_STACK
25487 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25488 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
25489 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
25490 #undef TARGET_FUNCTION_ARG_ADVANCE
25491 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
25492 #undef TARGET_FUNCTION_ARG
25493 #define TARGET_FUNCTION_ARG ix86_function_arg
25494 #undef TARGET_INIT_PIC_REG
25495 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
25496 #undef TARGET_USE_PSEUDO_PIC_REG
25497 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
25498 #undef TARGET_FUNCTION_ARG_BOUNDARY
25499 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
25500 #undef TARGET_PASS_BY_REFERENCE
25501 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25502 #undef TARGET_INTERNAL_ARG_POINTER
25503 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25504 #undef TARGET_UPDATE_STACK_BOUNDARY
25505 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
25506 #undef TARGET_GET_DRAP_RTX
25507 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
25508 #undef TARGET_STRICT_ARGUMENT_NAMING
25509 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25510 #undef TARGET_STATIC_CHAIN
25511 #define TARGET_STATIC_CHAIN ix86_static_chain
25512 #undef TARGET_TRAMPOLINE_INIT
25513 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
25514 #undef TARGET_RETURN_POPS_ARGS
25515 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
25517 #undef TARGET_WARN_FUNC_RETURN
25518 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
25520 #undef TARGET_LEGITIMATE_COMBINED_INSN
25521 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
25523 #undef TARGET_ASAN_SHADOW_OFFSET
25524 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
25526 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25527 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25529 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25530 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25532 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
25533 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
25534 ix86_libgcc_floating_mode_supported_p
25536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25537 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25539 #undef TARGET_C_MODE_FOR_SUFFIX
25540 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25542 #ifdef HAVE_AS_TLS
25543 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25544 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25545 #endif
25547 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25548 #undef TARGET_INSERT_ATTRIBUTES
25549 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25550 #endif
25552 #undef TARGET_MANGLE_TYPE
25553 #define TARGET_MANGLE_TYPE ix86_mangle_type
25555 #undef TARGET_EMIT_SUPPORT_TINFOS
25556 #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
25558 #undef TARGET_STACK_PROTECT_GUARD
25559 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
25561 #if !TARGET_MACHO
25562 #undef TARGET_STACK_PROTECT_FAIL
25563 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25564 #endif
25566 #undef TARGET_FUNCTION_VALUE
25567 #define TARGET_FUNCTION_VALUE ix86_function_value
25569 #undef TARGET_FUNCTION_VALUE_REGNO_P
25570 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
25572 #undef TARGET_ZERO_CALL_USED_REGS
25573 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
25575 #undef TARGET_PROMOTE_FUNCTION_MODE
25576 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
25578 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
25579 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
25581 #undef TARGET_MEMBER_TYPE_FORCES_BLK
25582 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
25584 #undef TARGET_INSTANTIATE_DECLS
25585 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
25587 #undef TARGET_SECONDARY_RELOAD
25588 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
25589 #undef TARGET_SECONDARY_MEMORY_NEEDED
25590 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
25591 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
25592 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
25594 #undef TARGET_CLASS_MAX_NREGS
25595 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
25597 #undef TARGET_PREFERRED_RELOAD_CLASS
25598 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
25599 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
25600 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
25601 #undef TARGET_CLASS_LIKELY_SPILLED_P
25602 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
25604 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25605 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
25606 ix86_builtin_vectorization_cost
25607 #undef TARGET_VECTORIZE_VEC_PERM_CONST
25608 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
25609 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
25610 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
25611 ix86_preferred_simd_mode
25612 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
25613 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
25614 ix86_split_reduction
25615 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
25616 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
25617 ix86_autovectorize_vector_modes
25618 #undef TARGET_VECTORIZE_GET_MASK_MODE
25619 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
25620 #undef TARGET_VECTORIZE_CREATE_COSTS
25621 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
25623 #undef TARGET_SET_CURRENT_FUNCTION
25624 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
25626 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
25627 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
25629 #undef TARGET_OPTION_SAVE
25630 #define TARGET_OPTION_SAVE ix86_function_specific_save
25632 #undef TARGET_OPTION_RESTORE
25633 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
25635 #undef TARGET_OPTION_POST_STREAM_IN
25636 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
25638 #undef TARGET_OPTION_PRINT
25639 #define TARGET_OPTION_PRINT ix86_function_specific_print
25641 #undef TARGET_OPTION_FUNCTION_VERSIONS
25642 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
25644 #undef TARGET_CAN_INLINE_P
25645 #define TARGET_CAN_INLINE_P ix86_can_inline_p
25647 #undef TARGET_LEGITIMATE_ADDRESS_P
25648 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
25650 #undef TARGET_REGISTER_PRIORITY
25651 #define TARGET_REGISTER_PRIORITY ix86_register_priority
25653 #undef TARGET_REGISTER_USAGE_LEVELING_P
25654 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
25656 #undef TARGET_LEGITIMATE_CONSTANT_P
25657 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
25659 #undef TARGET_COMPUTE_FRAME_LAYOUT
25660 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
25662 #undef TARGET_FRAME_POINTER_REQUIRED
25663 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
25665 #undef TARGET_CAN_ELIMINATE
25666 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
25668 #undef TARGET_EXTRA_LIVE_ON_ENTRY
25669 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
25671 #undef TARGET_ASM_CODE_END
25672 #define TARGET_ASM_CODE_END ix86_code_end
25674 #undef TARGET_CONDITIONAL_REGISTER_USAGE
25675 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
25677 #undef TARGET_CANONICALIZE_COMPARISON
25678 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
25680 #undef TARGET_LOOP_UNROLL_ADJUST
25681 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
25683 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
25684 #undef TARGET_SPILL_CLASS
25685 #define TARGET_SPILL_CLASS ix86_spill_class
25687 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
25688 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
25689 ix86_simd_clone_compute_vecsize_and_simdlen
25691 #undef TARGET_SIMD_CLONE_ADJUST
25692 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
25694 #undef TARGET_SIMD_CLONE_USABLE
25695 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
25697 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
25698 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
25700 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
25701 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
25702 ix86_float_exceptions_rounding_supported_p
25704 #undef TARGET_MODE_EMIT
25705 #define TARGET_MODE_EMIT ix86_emit_mode_set
25707 #undef TARGET_MODE_NEEDED
25708 #define TARGET_MODE_NEEDED ix86_mode_needed
25710 #undef TARGET_MODE_AFTER
25711 #define TARGET_MODE_AFTER ix86_mode_after
25713 #undef TARGET_MODE_ENTRY
25714 #define TARGET_MODE_ENTRY ix86_mode_entry
25716 #undef TARGET_MODE_EXIT
25717 #define TARGET_MODE_EXIT ix86_mode_exit
25719 #undef TARGET_MODE_PRIORITY
25720 #define TARGET_MODE_PRIORITY ix86_mode_priority
25722 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
25723 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
25725 #undef TARGET_OFFLOAD_OPTIONS
25726 #define TARGET_OFFLOAD_OPTIONS \
25727 ix86_offload_options
25729 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
25730 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
25732 #undef TARGET_OPTAB_SUPPORTED_P
25733 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
25735 #undef TARGET_HARD_REGNO_SCRATCH_OK
25736 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
25738 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
25739 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
25741 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
25742 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
25744 #undef TARGET_INIT_LIBFUNCS
25745 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
25747 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
25748 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
25750 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
25751 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
25753 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
25754 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
25756 #undef TARGET_HARD_REGNO_NREGS
25757 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
25758 #undef TARGET_HARD_REGNO_MODE_OK
25759 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
25761 #undef TARGET_MODES_TIEABLE_P
25762 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
25764 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
25765 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
25766 ix86_hard_regno_call_part_clobbered
25768 #undef TARGET_INSN_CALLEE_ABI
25769 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
25771 #undef TARGET_CAN_CHANGE_MODE_CLASS
25772 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
25774 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
25775 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
25777 #undef TARGET_STATIC_RTX_ALIGNMENT
25778 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
25779 #undef TARGET_CONSTANT_ALIGNMENT
25780 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
25782 #undef TARGET_EMPTY_RECORD_P
25783 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
25785 #undef TARGET_WARN_PARAMETER_PASSING_ABI
25786 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
25788 #undef TARGET_GET_MULTILIB_ABI_NAME
25789 #define TARGET_GET_MULTILIB_ABI_NAME \
25790 ix86_get_multilib_abi_name
25792 #undef TARGET_IFUNC_REF_LOCAL_OK
25793 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
25795 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25796 # undef TARGET_ASM_RELOC_RW_MASK
25797 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
25798 #endif
25800 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
25801 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
25803 #undef TARGET_MEMTAG_ADD_TAG
25804 #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
25806 #undef TARGET_MEMTAG_SET_TAG
25807 #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
25809 #undef TARGET_MEMTAG_EXTRACT_TAG
25810 #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
25812 #undef TARGET_MEMTAG_UNTAGGED_POINTER
25813 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
25815 #undef TARGET_MEMTAG_TAG_SIZE
25816 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
25818 static bool
25819 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
25821 #ifdef OPTION_GLIBC
25822 if (OPTION_GLIBC)
25823 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
25824 else
25825 return false;
25826 #else
25827 return false;
25828 #endif
25831 #undef TARGET_LIBC_HAS_FAST_FUNCTION
25832 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
25834 static unsigned
25835 ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
25836 bool boundary_p)
25838 #ifdef OPTION_GLIBC
25839 bool glibc_p = OPTION_GLIBC;
25840 #else
25841 bool glibc_p = false;
25842 #endif
25843 if (glibc_p)
25845 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
25846 unsigned int libmvec_ret = 0;
25847 if (!flag_trapping_math
25848 && flag_unsafe_math_optimizations
25849 && flag_finite_math_only
25850 && !flag_signed_zeros
25851 && !flag_errno_math)
25852 switch (cfn)
25854 CASE_CFN_COS:
25855 CASE_CFN_COS_FN:
25856 CASE_CFN_SIN:
25857 CASE_CFN_SIN_FN:
25858 if (!boundary_p)
25860 /* With non-default rounding modes, libmvec provides
25861 complete garbage in results. E.g.
25862 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
25863 returns 0.00333309174f rather than 1.40129846e-45f. */
25864 if (flag_rounding_math)
25865 return ~0U;
25866 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
25867 claims libmvec maximum error is 4ulps.
25868 My own random testing indicates 2ulps for SFmode and
25869 0.5ulps for DFmode, but let's go with the 4ulps. */
25870 libmvec_ret = 4;
25872 break;
25873 default:
25874 break;
25876 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
25877 boundary_p);
25878 return MAX (ret, libmvec_ret);
25880 return default_libm_function_max_error (cfn, mode, boundary_p);
25883 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
25884 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
25886 #if CHECKING_P
25887 #undef TARGET_RUN_TARGET_SELFTESTS
25888 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
25889 #endif /* #if CHECKING_P */
25891 struct gcc_target targetm = TARGET_INITIALIZER;
25893 #include "gt-i386.h"