Using UNSPEC for vector compare to mask register.
[official-gcc.git] / gcc / config / i386 / i386.c
blob10eb2dda3c7666b2de1af6ddf22b2bcdb5442010
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
74 #include "tree-iterator.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "dojump.h"
78 #include "fold-const-call.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "selftest.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
84 #include "intl.h"
85 #include "ifcvt.h"
86 #include "symbol-summary.h"
87 #include "ipa-prop.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
91 #include "debug.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
121 /* Set by -mtune. */
122 const struct processor_costs *ix86_tune_cost = NULL;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs *ix86_cost = NULL;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
129 epilogue code. */
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
142 /* ax, dx, cx, bx */
143 AREG, DREG, CREG, BREG,
144 /* si, di, bp, sp */
145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146 /* FP registers */
147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151 /* SSE registers */
152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154 /* MMX registers */
155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 /* REX registers */
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 /* SSE REX registers */
161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 /* Mask registers. */
169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
177 /* general regs */
178 0, 2, 1, 3, 6, 7, 4, 5,
179 /* fp regs */
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 /* SSE */
185 21, 22, 23, 24, 25, 26, 27, 28,
186 /* MMX */
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 /* extended sse registers */
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* Mask registers */
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
208 /* general regs */
209 0, 1, 2, 3, 4, 5, 6, 7,
210 /* fp regs */
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 /* SSE */
216 17, 18, 19, 20, 21, 22, 23, 24,
217 /* MMX */
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
227 /* Mask registers */
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
275 numbers.
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
287 /* general regs */
288 0, 2, 1, 3, 6, 7, 5, 4,
289 /* fp regs */
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM, 9,
293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294 /* SSE registers */
295 21, 22, 23, 24, 25, 26, 27, 28,
296 /* MMX registers */
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 /* extended sse registers */
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 /* Mask registers */
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers[6] =
318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
323 CX_REG, DX_REG, R8_REG, R9_REG
326 static int const x86_64_int_return_registers[4] =
328 AX_REG, DX_REG, DI_REG, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry {
334 unsigned short mode;
335 unsigned short n;
336 rtx rtl;
337 struct stack_local_entry *next;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse;
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary;
355 /* Alignment for incoming stack boundary in bits specified at
356 command line. */
357 unsigned int ix86_user_incoming_stack_boundary;
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary;
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary;
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node;
367 tree ms_va_list_type_node;
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix[16];
371 int internal_label_prefix_len;
373 /* Fence to use after loop using movnt. */
374 tree x86_mfence;
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
385 X86_64_NO_CLASS,
386 X86_64_INTEGER_CLASS,
387 X86_64_INTEGERSI_CLASS,
388 X86_64_SSE_CLASS,
389 X86_64_SSESF_CLASS,
390 X86_64_SSEDF_CLASS,
391 X86_64_SSEUP_CLASS,
392 X86_64_X87_CLASS,
393 X86_64_X87UP_CLASS,
394 X86_64_COMPLEX_X87_CLASS,
395 X86_64_MEMORY_CLASS
398 #define MAX_CLASSES 8
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
402 static bool ext_80387_constants_init;
405 static rtx ix86_function_value (const_tree, const_tree, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode,
408 const_tree);
409 static rtx ix86_static_chain (const_tree, bool);
410 static int ix86_function_regparm (const_tree, const_tree);
411 static void ix86_compute_frame_layout (void);
412 static tree ix86_canonical_va_list_type (tree);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
416 static bool ix86_can_inline_p (tree, tree);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted;
422 int ix86_arch_specified;
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
428 red-zone.
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
432 indirect thunk. */
434 bool
435 ix86_using_red_zone (void)
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun->machine->has_local_indirect_jump
440 || cfun->machine->indirect_branch_type == indirect_branch_keep));
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
446 static bool
447 ix86_profile_before_prologue (void)
449 return flag_fentry != 0;
452 /* Update register usage after having seen the compiler flags. */
454 static void
455 ix86_conditional_register_usage (void)
457 int i, c_mask;
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
462 value. */
463 if (cfun && cfun->machine->no_caller_saved_registers)
464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466 call_used_regs[i] = 0;
468 /* For 32-bit targets, disable the REX registers. */
469 if (! TARGET_64BIT)
471 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
482 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs[i] > 1)
489 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494 && call_used_regs[i])
495 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
498 /* If MMX is disabled, disable the registers. */
499 if (! TARGET_MMX)
500 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
502 /* If SSE is disabled, disable the registers. */
503 if (! TARGET_SSE)
504 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
506 /* If the FPU is disabled, disable the registers. */
507 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
508 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
510 /* If AVX512F is disabled, disable the registers. */
511 if (! TARGET_AVX512F)
513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
516 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
520 /* Canonicalize a comparison from one we don't have to one we do have. */
522 static void
523 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
524 bool op0_preserve_value)
526 /* The order of operands in x87 ficom compare is forced by combine in
527 simplify_comparison () function. Float operator is treated as RTX_OBJ
528 with a precedence over other operators and is always put in the first
529 place. Swap condition and operands to match ficom instruction. */
530 if (!op0_preserve_value
531 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
533 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
535 /* We are called only for compares that are split to SAHF instruction.
536 Ensure that we have setcc/jcc insn for the swapped condition. */
537 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
539 std::swap (*op0, *op1);
540 *code = (int) scode;
546 /* Hook to determine if one function can safely inline another. */
548 static bool
549 ix86_can_inline_p (tree caller, tree callee)
551 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
552 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
554 /* Changes of those flags can be tolerated for always inlines. Lets hope
555 user knows what he is doing. */
556 const unsigned HOST_WIDE_INT always_inline_safe_mask
557 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
558 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
560 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
561 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
562 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
563 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
566 if (!callee_tree)
567 callee_tree = target_option_default_node;
568 if (!caller_tree)
569 caller_tree = target_option_default_node;
570 if (callee_tree == caller_tree)
571 return true;
573 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
574 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
575 bool ret = false;
576 bool always_inline
577 = (DECL_DISREGARD_INLINE_LIMITS (callee)
578 && lookup_attribute ("always_inline",
579 DECL_ATTRIBUTES (callee)));
581 cgraph_node *callee_node = cgraph_node::get (callee);
582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583 function can inline a SSE2 function but a SSE2 function can't inline
584 a SSE4 function. */
585 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
586 != callee_opts->x_ix86_isa_flags)
587 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
588 != callee_opts->x_ix86_isa_flags2))
589 ret = false;
591 /* See if we have the same non-isa options. */
592 else if ((!always_inline
593 && caller_opts->x_target_flags != callee_opts->x_target_flags)
594 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
595 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
596 ret = false;
598 /* See if arch, tune, etc. are the same. */
599 else if (caller_opts->arch != callee_opts->arch)
600 ret = false;
602 else if (!always_inline && caller_opts->tune != callee_opts->tune)
603 ret = false;
605 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
606 /* If the calle doesn't use FP expressions differences in
607 ix86_fpmath can be ignored. We are called from FEs
608 for multi-versioning call optimization, so beware of
609 ipa_fn_summaries not available. */
610 && (! ipa_fn_summaries
611 || ipa_fn_summaries->get (callee_node) == NULL
612 || ipa_fn_summaries->get (callee_node)->fp_expressions))
613 ret = false;
615 else if (!always_inline
616 && caller_opts->branch_cost != callee_opts->branch_cost)
617 ret = false;
619 else
620 ret = true;
622 return ret;
625 /* Return true if this goes in large data/bss. */
627 static bool
628 ix86_in_large_data_p (tree exp)
630 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
631 return false;
633 if (exp == NULL_TREE)
634 return false;
636 /* Functions are never large data. */
637 if (TREE_CODE (exp) == FUNCTION_DECL)
638 return false;
640 /* Automatic variables are never large data. */
641 if (VAR_P (exp) && !is_global_var (exp))
642 return false;
644 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
646 const char *section = DECL_SECTION_NAME (exp);
647 if (strcmp (section, ".ldata") == 0
648 || strcmp (section, ".lbss") == 0)
649 return true;
650 return false;
652 else
654 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
656 /* If this is an incomplete type with size 0, then we can't put it
657 in data because it might be too big when completed. Also,
658 int_size_in_bytes returns -1 if size can vary or is larger than
659 an integer in which case also it is safer to assume that it goes in
660 large data. */
661 if (size <= 0 || size > ix86_section_threshold)
662 return true;
665 return false;
668 /* i386-specific section flag to mark large sections. */
669 #define SECTION_LARGE SECTION_MACH_DEP
671 /* Switch to the appropriate section for output of DECL.
672 DECL is either a `VAR_DECL' node or a constant of some sort.
673 RELOC indicates whether forming the initial value of DECL requires
674 link-time relocations. */
676 ATTRIBUTE_UNUSED static section *
677 x86_64_elf_select_section (tree decl, int reloc,
678 unsigned HOST_WIDE_INT align)
680 if (ix86_in_large_data_p (decl))
682 const char *sname = NULL;
683 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
684 switch (categorize_decl_for_section (decl, reloc))
686 case SECCAT_DATA:
687 sname = ".ldata";
688 break;
689 case SECCAT_DATA_REL:
690 sname = ".ldata.rel";
691 break;
692 case SECCAT_DATA_REL_LOCAL:
693 sname = ".ldata.rel.local";
694 break;
695 case SECCAT_DATA_REL_RO:
696 sname = ".ldata.rel.ro";
697 break;
698 case SECCAT_DATA_REL_RO_LOCAL:
699 sname = ".ldata.rel.ro.local";
700 break;
701 case SECCAT_BSS:
702 sname = ".lbss";
703 flags |= SECTION_BSS;
704 break;
705 case SECCAT_RODATA:
706 case SECCAT_RODATA_MERGE_STR:
707 case SECCAT_RODATA_MERGE_STR_INIT:
708 case SECCAT_RODATA_MERGE_CONST:
709 sname = ".lrodata";
710 flags &= ~SECTION_WRITE;
711 break;
712 case SECCAT_SRODATA:
713 case SECCAT_SDATA:
714 case SECCAT_SBSS:
715 gcc_unreachable ();
716 case SECCAT_TEXT:
717 case SECCAT_TDATA:
718 case SECCAT_TBSS:
719 /* We don't split these for medium model. Place them into
720 default sections and hope for best. */
721 break;
723 if (sname)
725 /* We might get called with string constants, but get_named_section
726 doesn't like them as they are not DECLs. Also, we need to set
727 flags in that case. */
728 if (!DECL_P (decl))
729 return get_section (sname, flags, NULL);
730 return get_named_section (decl, sname, reloc);
733 return default_elf_select_section (decl, reloc, align);
736 /* Select a set of attributes for section NAME based on the properties
737 of DECL and whether or not RELOC indicates that DECL's initializer
738 might contain runtime relocations. */
740 static unsigned int ATTRIBUTE_UNUSED
741 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
743 unsigned int flags = default_section_type_flags (decl, name, reloc);
745 if (ix86_in_large_data_p (decl))
746 flags |= SECTION_LARGE;
748 if (decl == NULL_TREE
749 && (strcmp (name, ".ldata.rel.ro") == 0
750 || strcmp (name, ".ldata.rel.ro.local") == 0))
751 flags |= SECTION_RELRO;
753 if (strcmp (name, ".lbss") == 0
754 || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0
755 || strncmp (name, ".gnu.linkonce.lb.",
756 sizeof (".gnu.linkonce.lb.") - 1) == 0)
757 flags |= SECTION_BSS;
759 return flags;
762 /* Build up a unique section name, expressed as a
763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764 RELOC indicates whether the initial value of EXP requires
765 link-time relocations. */
767 static void ATTRIBUTE_UNUSED
768 x86_64_elf_unique_section (tree decl, int reloc)
770 if (ix86_in_large_data_p (decl))
772 const char *prefix = NULL;
773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
774 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
776 switch (categorize_decl_for_section (decl, reloc))
778 case SECCAT_DATA:
779 case SECCAT_DATA_REL:
780 case SECCAT_DATA_REL_LOCAL:
781 case SECCAT_DATA_REL_RO:
782 case SECCAT_DATA_REL_RO_LOCAL:
783 prefix = one_only ? ".ld" : ".ldata";
784 break;
785 case SECCAT_BSS:
786 prefix = one_only ? ".lb" : ".lbss";
787 break;
788 case SECCAT_RODATA:
789 case SECCAT_RODATA_MERGE_STR:
790 case SECCAT_RODATA_MERGE_STR_INIT:
791 case SECCAT_RODATA_MERGE_CONST:
792 prefix = one_only ? ".lr" : ".lrodata";
793 break;
794 case SECCAT_SRODATA:
795 case SECCAT_SDATA:
796 case SECCAT_SBSS:
797 gcc_unreachable ();
798 case SECCAT_TEXT:
799 case SECCAT_TDATA:
800 case SECCAT_TBSS:
801 /* We don't split these for medium model. Place them into
802 default sections and hope for best. */
803 break;
805 if (prefix)
807 const char *name, *linkonce;
808 char *string;
810 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
811 name = targetm.strip_name_encoding (name);
813 /* If we're using one_only, then there needs to be a .gnu.linkonce
814 prefix to the section name. */
815 linkonce = one_only ? ".gnu.linkonce" : "";
817 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
819 set_decl_section_name (decl, string);
820 return;
823 default_unique_section (decl, reloc);
826 #ifdef COMMON_ASM_OP
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
830 #endif
832 /* This says how to output assembler code to declare an
833 uninitialized external linkage data object.
835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
836 large objects. */
837 void
838 x86_elf_aligned_decl_common (FILE *file, tree decl,
839 const char *name, unsigned HOST_WIDE_INT size,
840 int align)
842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
843 && size > (unsigned int)ix86_section_threshold)
845 switch_to_section (get_named_section (decl, ".lbss", 0));
846 fputs (LARGECOMM_SECTION_ASM_OP, file);
848 else
849 fputs (COMMON_ASM_OP, file);
850 assemble_name (file, name);
851 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
852 size, align / BITS_PER_UNIT);
854 #endif
856 /* Utility function for targets to use in implementing
857 ASM_OUTPUT_ALIGNED_BSS. */
859 void
860 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
861 unsigned HOST_WIDE_INT size, int align)
863 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
864 && size > (unsigned int)ix86_section_threshold)
865 switch_to_section (get_named_section (decl, ".lbss", 0));
866 else
867 switch_to_section (bss_section);
868 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870 last_assemble_variable_decl = decl;
871 ASM_DECLARE_OBJECT_NAME (file, name, decl);
872 #else
873 /* Standard thing is just output label for the object. */
874 ASM_OUTPUT_LABEL (file, name);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876 ASM_OUTPUT_SKIP (file, size ? size : 1);
879 /* Decide whether we must probe the stack before any space allocation
880 on this target. It's essentially TARGET_STACK_PROBE except when
881 -fstack-check causes the stack to be already probed differently. */
883 bool
884 ix86_target_stack_probe (void)
886 /* Do not probe the stack twice if static stack checking is enabled. */
887 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
888 return false;
890 return TARGET_STACK_PROBE;
893 /* Decide whether we can make a sibling call to a function. DECL is the
894 declaration of the function being targeted by the call and EXP is the
895 CALL_EXPR representing the call. */
897 static bool
898 ix86_function_ok_for_sibcall (tree decl, tree exp)
900 tree type, decl_or_type;
901 rtx a, b;
902 bool bind_global = decl && !targetm.binds_local_p (decl);
904 if (ix86_function_naked (current_function_decl))
905 return false;
907 /* Sibling call isn't OK if there are no caller-saved registers
908 since all registers must be preserved before return. */
909 if (cfun->machine->no_caller_saved_registers)
910 return false;
912 /* If we are generating position-independent code, we cannot sibcall
913 optimize direct calls to global functions, as the PLT requires
914 %ebx be live. (Darwin does not have a PLT.) */
915 if (!TARGET_MACHO
916 && !TARGET_64BIT
917 && flag_pic
918 && flag_plt
919 && bind_global)
920 return false;
922 /* If we need to align the outgoing stack, then sibcalling would
923 unalign the stack, which may break the called function. */
924 if (ix86_minimum_incoming_stack_boundary (true)
925 < PREFERRED_STACK_BOUNDARY)
926 return false;
928 if (decl)
930 decl_or_type = decl;
931 type = TREE_TYPE (decl);
933 else
935 /* We're looking at the CALL_EXPR, we need the type of the function. */
936 type = CALL_EXPR_FN (exp); /* pointer expression */
937 type = TREE_TYPE (type); /* pointer type */
938 type = TREE_TYPE (type); /* function type */
939 decl_or_type = type;
942 /* Check that the return value locations are the same. Like
943 if we are returning floats on the 80387 register stack, we cannot
944 make a sibcall from a function that doesn't return a float to a
945 function that does or, conversely, from a function that does return
946 a float to a function that doesn't; the necessary stack adjustment
947 would not be executed. This is also the place we notice
948 differences in the return value ABI. Note that it is ok for one
949 of the functions to have void return type as long as the return
950 value of the other is passed in a register. */
951 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
952 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
953 cfun->decl, false);
954 if (STACK_REG_P (a) || STACK_REG_P (b))
956 if (!rtx_equal_p (a, b))
957 return false;
959 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
961 else if (!rtx_equal_p (a, b))
962 return false;
964 if (TARGET_64BIT)
966 /* The SYSV ABI has more call-clobbered registers;
967 disallow sibcalls from MS to SYSV. */
968 if (cfun->machine->call_abi == MS_ABI
969 && ix86_function_type_abi (type) == SYSV_ABI)
970 return false;
972 else
974 /* If this call is indirect, we'll need to be able to use a
975 call-clobbered register for the address of the target function.
976 Make sure that all such registers are not used for passing
977 parameters. Note that DLLIMPORT functions and call to global
978 function via GOT slot are indirect. */
979 if (!decl
980 || (bind_global && flag_pic && !flag_plt)
981 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
982 || flag_force_indirect_call)
984 /* Check if regparm >= 3 since arg_reg_available is set to
985 false if regparm == 0. If regparm is 1 or 2, there is
986 always a call-clobbered register available.
988 ??? The symbol indirect call doesn't need a call-clobbered
989 register. But we don't know if this is a symbol indirect
990 call or not here. */
991 if (ix86_function_regparm (type, decl) >= 3
992 && !cfun->machine->arg_reg_available)
993 return false;
997 /* Otherwise okay. That also includes certain types of indirect calls. */
998 return true;
1001 /* This function determines from TYPE the calling-convention. */
1003 unsigned int
1004 ix86_get_callcvt (const_tree type)
1006 unsigned int ret = 0;
1007 bool is_stdarg;
1008 tree attrs;
1010 if (TARGET_64BIT)
1011 return IX86_CALLCVT_CDECL;
1013 attrs = TYPE_ATTRIBUTES (type);
1014 if (attrs != NULL_TREE)
1016 if (lookup_attribute ("cdecl", attrs))
1017 ret |= IX86_CALLCVT_CDECL;
1018 else if (lookup_attribute ("stdcall", attrs))
1019 ret |= IX86_CALLCVT_STDCALL;
1020 else if (lookup_attribute ("fastcall", attrs))
1021 ret |= IX86_CALLCVT_FASTCALL;
1022 else if (lookup_attribute ("thiscall", attrs))
1023 ret |= IX86_CALLCVT_THISCALL;
1025 /* Regparam isn't allowed for thiscall and fastcall. */
1026 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1028 if (lookup_attribute ("regparm", attrs))
1029 ret |= IX86_CALLCVT_REGPARM;
1030 if (lookup_attribute ("sseregparm", attrs))
1031 ret |= IX86_CALLCVT_SSEREGPARM;
1034 if (IX86_BASE_CALLCVT(ret) != 0)
1035 return ret;
1038 is_stdarg = stdarg_p (type);
1039 if (TARGET_RTD && !is_stdarg)
1040 return IX86_CALLCVT_STDCALL | ret;
1042 if (ret != 0
1043 || is_stdarg
1044 || TREE_CODE (type) != METHOD_TYPE
1045 || ix86_function_type_abi (type) != MS_ABI)
1046 return IX86_CALLCVT_CDECL | ret;
1048 return IX86_CALLCVT_THISCALL;
1051 /* Return 0 if the attributes for two types are incompatible, 1 if they
1052 are compatible, and 2 if they are nearly compatible (which causes a
1053 warning to be generated). */
1055 static int
1056 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1058 unsigned int ccvt1, ccvt2;
1060 if (TREE_CODE (type1) != FUNCTION_TYPE
1061 && TREE_CODE (type1) != METHOD_TYPE)
1062 return 1;
1064 ccvt1 = ix86_get_callcvt (type1);
1065 ccvt2 = ix86_get_callcvt (type2);
1066 if (ccvt1 != ccvt2)
1067 return 0;
1068 if (ix86_function_regparm (type1, NULL)
1069 != ix86_function_regparm (type2, NULL))
1070 return 0;
1072 return 1;
1075 /* Return the regparm value for a function with the indicated TYPE and DECL.
1076 DECL may be NULL when calling function indirectly
1077 or considering a libcall. */
1079 static int
1080 ix86_function_regparm (const_tree type, const_tree decl)
1082 tree attr;
1083 int regparm;
1084 unsigned int ccvt;
1086 if (TARGET_64BIT)
1087 return (ix86_function_type_abi (type) == SYSV_ABI
1088 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1089 ccvt = ix86_get_callcvt (type);
1090 regparm = ix86_regparm;
1092 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1094 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1095 if (attr)
1097 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1098 return regparm;
1101 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1102 return 2;
1103 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1104 return 1;
1106 /* Use register calling convention for local functions when possible. */
1107 if (decl
1108 && TREE_CODE (decl) == FUNCTION_DECL)
1110 cgraph_node *target = cgraph_node::get (decl);
1111 if (target)
1112 target = target->function_symbol ();
1114 /* Caller and callee must agree on the calling convention, so
1115 checking here just optimize means that with
1116 __attribute__((optimize (...))) caller could use regparm convention
1117 and callee not, or vice versa. Instead look at whether the callee
1118 is optimized or not. */
1119 if (target && opt_for_fn (target->decl, optimize)
1120 && !(profile_flag && !flag_fentry))
1122 if (target->local && target->can_change_signature)
1124 int local_regparm, globals = 0, regno;
1126 /* Make sure no regparm register is taken by a
1127 fixed register variable. */
1128 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1129 local_regparm++)
1130 if (fixed_regs[local_regparm])
1131 break;
1133 /* We don't want to use regparm(3) for nested functions as
1134 these use a static chain pointer in the third argument. */
1135 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1136 local_regparm = 2;
1138 /* Save a register for the split stack. */
1139 if (flag_split_stack)
1141 if (local_regparm == 3)
1142 local_regparm = 2;
1143 else if (local_regparm == 2
1144 && DECL_STATIC_CHAIN (target->decl))
1145 local_regparm = 1;
1148 /* Each fixed register usage increases register pressure,
1149 so less registers should be used for argument passing.
1150 This functionality can be overriden by an explicit
1151 regparm value. */
1152 for (regno = AX_REG; regno <= DI_REG; regno++)
1153 if (fixed_regs[regno])
1154 globals++;
1156 local_regparm
1157 = globals < local_regparm ? local_regparm - globals : 0;
1159 if (local_regparm > regparm)
1160 regparm = local_regparm;
1165 return regparm;
1168 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1169 DFmode (2) arguments in SSE registers for a function with the
1170 indicated TYPE and DECL. DECL may be NULL when calling function
1171 indirectly or considering a libcall. Return -1 if any FP parameter
1172 should be rejected by error. This is used in siutation we imply SSE
1173 calling convetion but the function is called from another function with
1174 SSE disabled. Otherwise return 0. */
1176 static int
1177 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1179 gcc_assert (!TARGET_64BIT);
1181 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1182 by the sseregparm attribute. */
1183 if (TARGET_SSEREGPARM
1184 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1186 if (!TARGET_SSE)
1188 if (warn)
1190 if (decl)
1191 error ("calling %qD with attribute sseregparm without "
1192 "SSE/SSE2 enabled", decl);
1193 else
1194 error ("calling %qT with attribute sseregparm without "
1195 "SSE/SSE2 enabled", type);
1197 return 0;
1200 return 2;
1203 if (!decl)
1204 return 0;
1206 cgraph_node *target = cgraph_node::get (decl);
1207 if (target)
1208 target = target->function_symbol ();
1210 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1211 (and DFmode for SSE2) arguments in SSE registers. */
1212 if (target
1213 /* TARGET_SSE_MATH */
1214 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1215 && opt_for_fn (target->decl, optimize)
1216 && !(profile_flag && !flag_fentry))
1218 if (target->local && target->can_change_signature)
1220 /* Refuse to produce wrong code when local function with SSE enabled
1221 is called from SSE disabled function.
1222 FIXME: We need a way to detect these cases cross-ltrans partition
1223 and avoid using SSE calling conventions on local functions called
1224 from function with SSE disabled. For now at least delay the
1225 warning until we know we are going to produce wrong code.
1226 See PR66047 */
1227 if (!TARGET_SSE && warn)
1228 return -1;
1229 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1230 ->x_ix86_isa_flags) ? 2 : 1;
1234 return 0;
1237 /* Return true if EAX is live at the start of the function. Used by
1238 ix86_expand_prologue to determine if we need special help before
1239 calling allocate_stack_worker. */
1241 static bool
1242 ix86_eax_live_at_start_p (void)
1244 /* Cheat. Don't bother working forward from ix86_function_regparm
1245 to the function type to whether an actual argument is located in
1246 eax. Instead just look at cfg info, which is still close enough
1247 to correct at this point. This gives false positives for broken
1248 functions that might use uninitialized data that happens to be
1249 allocated in eax, but who cares? */
1250 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1253 static bool
1254 ix86_keep_aggregate_return_pointer (tree fntype)
1256 tree attr;
1258 if (!TARGET_64BIT)
1260 attr = lookup_attribute ("callee_pop_aggregate_return",
1261 TYPE_ATTRIBUTES (fntype));
1262 if (attr)
1263 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1265 /* For 32-bit MS-ABI the default is to keep aggregate
1266 return pointer. */
1267 if (ix86_function_type_abi (fntype) == MS_ABI)
1268 return true;
1270 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1273 /* Value is the number of bytes of arguments automatically
1274 popped when returning from a subroutine call.
1275 FUNDECL is the declaration node of the function (as a tree),
1276 FUNTYPE is the data type of the function (as a tree),
1277 or for a library call it is an identifier node for the subroutine name.
1278 SIZE is the number of bytes of arguments passed on the stack.
1280 On the 80386, the RTD insn may be used to pop them if the number
1281 of args is fixed, but if the number is variable then the caller
1282 must pop them all. RTD can't be used for library calls now
1283 because the library is compiled with the Unix compiler.
1284 Use of RTD is a selectable option, since it is incompatible with
1285 standard Unix calling sequences. If the option is not selected,
1286 the caller must always pop the args.
1288 The attribute stdcall is equivalent to RTD on a per module basis. */
1290 static poly_int64
1291 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1293 unsigned int ccvt;
1295 /* None of the 64-bit ABIs pop arguments. */
1296 if (TARGET_64BIT)
1297 return 0;
1299 ccvt = ix86_get_callcvt (funtype);
1301 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1302 | IX86_CALLCVT_THISCALL)) != 0
1303 && ! stdarg_p (funtype))
1304 return size;
1306 /* Lose any fake structure return argument if it is passed on the stack. */
1307 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1308 && !ix86_keep_aggregate_return_pointer (funtype))
1310 int nregs = ix86_function_regparm (funtype, fundecl);
1311 if (nregs == 0)
1312 return GET_MODE_SIZE (Pmode);
1315 return 0;
1318 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1320 static bool
1321 ix86_legitimate_combined_insn (rtx_insn *insn)
1323 int i;
1325 /* Check operand constraints in case hard registers were propagated
1326 into insn pattern. This check prevents combine pass from
1327 generating insn patterns with invalid hard register operands.
1328 These invalid insns can eventually confuse reload to error out
1329 with a spill failure. See also PRs 46829 and 46843. */
1331 gcc_assert (INSN_CODE (insn) >= 0);
1333 extract_insn (insn);
1334 preprocess_constraints (insn);
1336 int n_operands = recog_data.n_operands;
1337 int n_alternatives = recog_data.n_alternatives;
1338 for (i = 0; i < n_operands; i++)
1340 rtx op = recog_data.operand[i];
1341 machine_mode mode = GET_MODE (op);
1342 const operand_alternative *op_alt;
1343 int offset = 0;
1344 bool win;
1345 int j;
1347 /* A unary operator may be accepted by the predicate, but it
1348 is irrelevant for matching constraints. */
1349 if (UNARY_P (op))
1350 op = XEXP (op, 0);
1352 if (SUBREG_P (op))
1354 if (REG_P (SUBREG_REG (op))
1355 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1356 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1357 GET_MODE (SUBREG_REG (op)),
1358 SUBREG_BYTE (op),
1359 GET_MODE (op));
1360 op = SUBREG_REG (op);
1363 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1364 continue;
1366 op_alt = recog_op_alt;
1368 /* Operand has no constraints, anything is OK. */
1369 win = !n_alternatives;
1371 alternative_mask preferred = get_preferred_alternatives (insn);
1372 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1374 if (!TEST_BIT (preferred, j))
1375 continue;
1376 if (op_alt[i].anything_ok
1377 || (op_alt[i].matches != -1
1378 && operands_match_p
1379 (recog_data.operand[i],
1380 recog_data.operand[op_alt[i].matches]))
1381 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1383 win = true;
1384 break;
1388 if (!win)
1389 return false;
1392 return true;
1395 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1397 static unsigned HOST_WIDE_INT
1398 ix86_asan_shadow_offset (void)
1400 return SUBTARGET_SHADOW_OFFSET;
1403 /* Argument support functions. */
1405 /* Return true when register may be used to pass function parameters. */
1406 bool
1407 ix86_function_arg_regno_p (int regno)
1409 int i;
1410 enum calling_abi call_abi;
1411 const int *parm_regs;
1413 if (!TARGET_64BIT)
1415 if (TARGET_MACHO)
1416 return (regno < REGPARM_MAX
1417 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1418 else
1419 return (regno < REGPARM_MAX
1420 || (TARGET_MMX && MMX_REGNO_P (regno)
1421 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
1422 || (TARGET_SSE && SSE_REGNO_P (regno)
1423 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
1426 if (TARGET_SSE && SSE_REGNO_P (regno)
1427 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
1428 return true;
1430 /* TODO: The function should depend on current function ABI but
1431 builtins.c would need updating then. Therefore we use the
1432 default ABI. */
1433 call_abi = ix86_cfun_abi ();
1435 /* RAX is used as hidden argument to va_arg functions. */
1436 if (call_abi == SYSV_ABI && regno == AX_REG)
1437 return true;
1439 if (call_abi == MS_ABI)
1440 parm_regs = x86_64_ms_abi_int_parameter_registers;
1441 else
1442 parm_regs = x86_64_int_parameter_registers;
1444 for (i = 0; i < (call_abi == MS_ABI
1445 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1446 if (regno == parm_regs[i])
1447 return true;
1448 return false;
1451 /* Return if we do not know how to pass ARG solely in registers. */
1453 static bool
1454 ix86_must_pass_in_stack (const function_arg_info &arg)
1456 if (must_pass_in_stack_var_size_or_pad (arg))
1457 return true;
1459 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1460 The layout_type routine is crafty and tries to trick us into passing
1461 currently unsupported vector types on the stack by using TImode. */
1462 return (!TARGET_64BIT && arg.mode == TImode
1463 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1466 /* It returns the size, in bytes, of the area reserved for arguments passed
1467 in registers for the function represented by fndecl dependent to the used
1468 abi format. */
1470 ix86_reg_parm_stack_space (const_tree fndecl)
1472 enum calling_abi call_abi = SYSV_ABI;
1473 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1474 call_abi = ix86_function_abi (fndecl);
1475 else
1476 call_abi = ix86_function_type_abi (fndecl);
1477 if (TARGET_64BIT && call_abi == MS_ABI)
1478 return 32;
1479 return 0;
1482 /* We add this as a workaround in order to use libc_has_function
1483 hook in i386.md. */
1484 bool
1485 ix86_libc_has_function (enum function_class fn_class)
1487 return targetm.libc_has_function (fn_class);
1490 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1491 specifying the call abi used. */
1492 enum calling_abi
1493 ix86_function_type_abi (const_tree fntype)
1495 enum calling_abi abi = ix86_abi;
1497 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1498 return abi;
1500 if (abi == SYSV_ABI
1501 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1503 static int warned;
1504 if (TARGET_X32 && !warned)
1506 error ("X32 does not support %<ms_abi%> attribute");
1507 warned = 1;
1510 abi = MS_ABI;
1512 else if (abi == MS_ABI
1513 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1514 abi = SYSV_ABI;
1516 return abi;
1519 enum calling_abi
1520 ix86_function_abi (const_tree fndecl)
1522 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1525 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1526 specifying the call abi used. */
1527 enum calling_abi
1528 ix86_cfun_abi (void)
1530 return cfun ? cfun->machine->call_abi : ix86_abi;
1533 bool
1534 ix86_function_ms_hook_prologue (const_tree fn)
1536 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1538 if (decl_function_context (fn) != NULL_TREE)
1539 error_at (DECL_SOURCE_LOCATION (fn),
1540 "%<ms_hook_prologue%> attribute is not compatible "
1541 "with nested function");
1542 else
1543 return true;
1545 return false;
1548 bool
1549 ix86_function_naked (const_tree fn)
1551 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1552 return true;
1554 return false;
1557 /* Write the extra assembler code needed to declare a function properly. */
1559 void
1560 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1561 tree decl)
1563 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1565 if (cfun)
1566 cfun->machine->function_label_emitted = true;
1568 if (is_ms_hook)
1570 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1571 unsigned int filler_cc = 0xcccccccc;
1573 for (i = 0; i < filler_count; i += 4)
1574 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1577 #ifdef SUBTARGET_ASM_UNWIND_INIT
1578 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1579 #endif
1581 ASM_OUTPUT_LABEL (asm_out_file, fname);
1583 /* Output magic byte marker, if hot-patch attribute is set. */
1584 if (is_ms_hook)
1586 if (TARGET_64BIT)
1588 /* leaq [%rsp + 0], %rsp */
1589 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1590 asm_out_file);
1592 else
1594 /* movl.s %edi, %edi
1595 push %ebp
1596 movl.s %esp, %ebp */
1597 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1602 /* Implementation of call abi switching target hook. Specific to FNDECL
1603 the specific call register sets are set. See also
1604 ix86_conditional_register_usage for more details. */
1605 void
1606 ix86_call_abi_override (const_tree fndecl)
1608 cfun->machine->call_abi = ix86_function_abi (fndecl);
1611 /* Return 1 if pseudo register should be created and used to hold
1612 GOT address for PIC code. */
1613 bool
1614 ix86_use_pseudo_pic_reg (void)
1616 if ((TARGET_64BIT
1617 && (ix86_cmodel == CM_SMALL_PIC
1618 || TARGET_PECOFF))
1619 || !flag_pic)
1620 return false;
1621 return true;
1624 /* Initialize large model PIC register. */
1626 static void
1627 ix86_init_large_pic_reg (unsigned int tmp_regno)
1629 rtx_code_label *label;
1630 rtx tmp_reg;
1632 gcc_assert (Pmode == DImode);
1633 label = gen_label_rtx ();
1634 emit_label (label);
1635 LABEL_PRESERVE_P (label) = 1;
1636 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1637 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1638 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1639 label));
1640 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1641 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1642 const char *name = LABEL_NAME (label);
1643 PUT_CODE (label, NOTE);
1644 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1645 NOTE_DELETED_LABEL_NAME (label) = name;
1648 /* Create and initialize PIC register if required. */
1649 static void
1650 ix86_init_pic_reg (void)
1652 edge entry_edge;
1653 rtx_insn *seq;
1655 if (!ix86_use_pseudo_pic_reg ())
1656 return;
1658 start_sequence ();
1660 if (TARGET_64BIT)
1662 if (ix86_cmodel == CM_LARGE_PIC)
1663 ix86_init_large_pic_reg (R11_REG);
1664 else
1665 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1667 else
1669 /* If there is future mcount call in the function it is more profitable
1670 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1671 rtx reg = crtl->profile
1672 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1673 : pic_offset_table_rtx;
1674 rtx_insn *insn = emit_insn (gen_set_got (reg));
1675 RTX_FRAME_RELATED_P (insn) = 1;
1676 if (crtl->profile)
1677 emit_move_insn (pic_offset_table_rtx, reg);
1678 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1681 seq = get_insns ();
1682 end_sequence ();
1684 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1685 insert_insn_on_edge (seq, entry_edge);
1686 commit_one_edge_insertion (entry_edge);
1689 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1690 for a call to a function whose data type is FNTYPE.
1691 For a library call, FNTYPE is 0. */
1693 void
1694 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1695 tree fntype, /* tree ptr for function decl */
1696 rtx libname, /* SYMBOL_REF of library name or 0 */
1697 tree fndecl,
1698 int caller)
1700 struct cgraph_node *local_info_node = NULL;
1701 struct cgraph_node *target = NULL;
1703 memset (cum, 0, sizeof (*cum));
1705 if (fndecl)
1707 target = cgraph_node::get (fndecl);
1708 if (target)
1710 target = target->function_symbol ();
1711 local_info_node = cgraph_node::local_info_node (target->decl);
1712 cum->call_abi = ix86_function_abi (target->decl);
1714 else
1715 cum->call_abi = ix86_function_abi (fndecl);
1717 else
1718 cum->call_abi = ix86_function_type_abi (fntype);
1720 cum->caller = caller;
1722 /* Set up the number of registers to use for passing arguments. */
1723 cum->nregs = ix86_regparm;
1724 if (TARGET_64BIT)
1726 cum->nregs = (cum->call_abi == SYSV_ABI
1727 ? X86_64_REGPARM_MAX
1728 : X86_64_MS_REGPARM_MAX);
1730 if (TARGET_SSE)
1732 cum->sse_nregs = SSE_REGPARM_MAX;
1733 if (TARGET_64BIT)
1735 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1736 ? X86_64_SSE_REGPARM_MAX
1737 : X86_64_MS_SSE_REGPARM_MAX);
1740 if (TARGET_MMX)
1741 cum->mmx_nregs = MMX_REGPARM_MAX;
1742 cum->warn_avx512f = true;
1743 cum->warn_avx = true;
1744 cum->warn_sse = true;
1745 cum->warn_mmx = true;
1747 /* Because type might mismatch in between caller and callee, we need to
1748 use actual type of function for local calls.
1749 FIXME: cgraph_analyze can be told to actually record if function uses
1750 va_start so for local functions maybe_vaarg can be made aggressive
1751 helping K&R code.
1752 FIXME: once typesytem is fixed, we won't need this code anymore. */
1753 if (local_info_node && local_info_node->local
1754 && local_info_node->can_change_signature)
1755 fntype = TREE_TYPE (target->decl);
1756 cum->stdarg = stdarg_p (fntype);
1757 cum->maybe_vaarg = (fntype
1758 ? (!prototype_p (fntype) || stdarg_p (fntype))
1759 : !libname);
1761 cum->decl = fndecl;
1763 cum->warn_empty = !warn_abi || cum->stdarg;
1764 if (!cum->warn_empty && fntype)
1766 function_args_iterator iter;
1767 tree argtype;
1768 bool seen_empty_type = false;
1769 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1771 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1772 break;
1773 if (TYPE_EMPTY_P (argtype))
1774 seen_empty_type = true;
1775 else if (seen_empty_type)
1777 cum->warn_empty = true;
1778 break;
1783 if (!TARGET_64BIT)
1785 /* If there are variable arguments, then we won't pass anything
1786 in registers in 32-bit mode. */
1787 if (stdarg_p (fntype))
1789 cum->nregs = 0;
1790 /* Since in 32-bit, variable arguments are always passed on
1791 stack, there is scratch register available for indirect
1792 sibcall. */
1793 cfun->machine->arg_reg_available = true;
1794 cum->sse_nregs = 0;
1795 cum->mmx_nregs = 0;
1796 cum->warn_avx512f = false;
1797 cum->warn_avx = false;
1798 cum->warn_sse = false;
1799 cum->warn_mmx = false;
1800 return;
1803 /* Use ecx and edx registers if function has fastcall attribute,
1804 else look for regparm information. */
1805 if (fntype)
1807 unsigned int ccvt = ix86_get_callcvt (fntype);
1808 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1810 cum->nregs = 1;
1811 cum->fastcall = 1; /* Same first register as in fastcall. */
1813 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1815 cum->nregs = 2;
1816 cum->fastcall = 1;
1818 else
1819 cum->nregs = ix86_function_regparm (fntype, fndecl);
1822 /* Set up the number of SSE registers used for passing SFmode
1823 and DFmode arguments. Warn for mismatching ABI. */
1824 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1827 cfun->machine->arg_reg_available = (cum->nregs > 0);
1830 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1831 But in the case of vector types, it is some vector mode.
1833 When we have only some of our vector isa extensions enabled, then there
1834 are some modes for which vector_mode_supported_p is false. For these
1835 modes, the generic vector support in gcc will choose some non-vector mode
1836 in order to implement the type. By computing the natural mode, we'll
1837 select the proper ABI location for the operand and not depend on whatever
1838 the middle-end decides to do with these vector types.
1840 The midde-end can't deal with the vector types > 16 bytes. In this
1841 case, we return the original mode and warn ABI change if CUM isn't
1842 NULL.
1844 If INT_RETURN is true, warn ABI change if the vector mode isn't
1845 available for function return value. */
1847 static machine_mode
1848 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1849 bool in_return)
1851 machine_mode mode = TYPE_MODE (type);
1853 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1855 HOST_WIDE_INT size = int_size_in_bytes (type);
1856 if ((size == 8 || size == 16 || size == 32 || size == 64)
1857 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1858 && TYPE_VECTOR_SUBPARTS (type) > 1)
1860 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1862 /* There are no XFmode vector modes. */
1863 if (innermode == XFmode)
1864 return mode;
1866 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1867 mode = MIN_MODE_VECTOR_FLOAT;
1868 else
1869 mode = MIN_MODE_VECTOR_INT;
1871 /* Get the mode which has this inner mode and number of units. */
1872 FOR_EACH_MODE_FROM (mode, mode)
1873 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1874 && GET_MODE_INNER (mode) == innermode)
1876 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1878 static bool warnedavx512f;
1879 static bool warnedavx512f_ret;
1881 if (cum && cum->warn_avx512f && !warnedavx512f)
1883 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1884 "without AVX512F enabled changes the ABI"))
1885 warnedavx512f = true;
1887 else if (in_return && !warnedavx512f_ret)
1889 if (warning (OPT_Wpsabi, "AVX512F vector return "
1890 "without AVX512F enabled changes the ABI"))
1891 warnedavx512f_ret = true;
1894 return TYPE_MODE (type);
1896 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1898 static bool warnedavx;
1899 static bool warnedavx_ret;
1901 if (cum && cum->warn_avx && !warnedavx)
1903 if (warning (OPT_Wpsabi, "AVX vector argument "
1904 "without AVX enabled changes the ABI"))
1905 warnedavx = true;
1907 else if (in_return && !warnedavx_ret)
1909 if (warning (OPT_Wpsabi, "AVX vector return "
1910 "without AVX enabled changes the ABI"))
1911 warnedavx_ret = true;
1914 return TYPE_MODE (type);
1916 else if (((size == 8 && TARGET_64BIT) || size == 16)
1917 && !TARGET_SSE
1918 && !TARGET_IAMCU)
1920 static bool warnedsse;
1921 static bool warnedsse_ret;
1923 if (cum && cum->warn_sse && !warnedsse)
1925 if (warning (OPT_Wpsabi, "SSE vector argument "
1926 "without SSE enabled changes the ABI"))
1927 warnedsse = true;
1929 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1931 if (warning (OPT_Wpsabi, "SSE vector return "
1932 "without SSE enabled changes the ABI"))
1933 warnedsse_ret = true;
1936 else if ((size == 8 && !TARGET_64BIT)
1937 && (!cfun
1938 || cfun->machine->func_type == TYPE_NORMAL)
1939 && !TARGET_MMX
1940 && !TARGET_IAMCU)
1942 static bool warnedmmx;
1943 static bool warnedmmx_ret;
1945 if (cum && cum->warn_mmx && !warnedmmx)
1947 if (warning (OPT_Wpsabi, "MMX vector argument "
1948 "without MMX enabled changes the ABI"))
1949 warnedmmx = true;
1951 else if (in_return && !warnedmmx_ret)
1953 if (warning (OPT_Wpsabi, "MMX vector return "
1954 "without MMX enabled changes the ABI"))
1955 warnedmmx_ret = true;
1958 return mode;
1961 gcc_unreachable ();
1965 return mode;
1968 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1969 this may not agree with the mode that the type system has chosen for the
1970 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1971 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1973 static rtx
1974 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1975 unsigned int regno)
1977 rtx tmp;
1979 if (orig_mode != BLKmode)
1980 tmp = gen_rtx_REG (orig_mode, regno);
1981 else
1983 tmp = gen_rtx_REG (mode, regno);
1984 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1985 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
1988 return tmp;
1991 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1992 of this code is to classify each 8bytes of incoming argument by the register
1993 class and assign registers accordingly. */
1995 /* Return the union class of CLASS1 and CLASS2.
1996 See the x86-64 PS ABI for details. */
1998 static enum x86_64_reg_class
1999 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2001 /* Rule #1: If both classes are equal, this is the resulting class. */
2002 if (class1 == class2)
2003 return class1;
2005 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2006 the other class. */
2007 if (class1 == X86_64_NO_CLASS)
2008 return class2;
2009 if (class2 == X86_64_NO_CLASS)
2010 return class1;
2012 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2013 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2014 return X86_64_MEMORY_CLASS;
2016 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2017 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2018 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2019 return X86_64_INTEGERSI_CLASS;
2020 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2021 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2022 return X86_64_INTEGER_CLASS;
2024 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2025 MEMORY is used. */
2026 if (class1 == X86_64_X87_CLASS
2027 || class1 == X86_64_X87UP_CLASS
2028 || class1 == X86_64_COMPLEX_X87_CLASS
2029 || class2 == X86_64_X87_CLASS
2030 || class2 == X86_64_X87UP_CLASS
2031 || class2 == X86_64_COMPLEX_X87_CLASS)
2032 return X86_64_MEMORY_CLASS;
2034 /* Rule #6: Otherwise class SSE is used. */
2035 return X86_64_SSE_CLASS;
2038 /* Classify the argument of type TYPE and mode MODE.
2039 CLASSES will be filled by the register class used to pass each word
2040 of the operand. The number of words is returned. In case the parameter
2041 should be passed in memory, 0 is returned. As a special case for zero
2042 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2044 BIT_OFFSET is used internally for handling records and specifies offset
2045 of the offset in bits modulo 512 to avoid overflow cases.
2047 See the x86-64 PS ABI for details.
2050 static int
2051 classify_argument (machine_mode mode, const_tree type,
2052 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2054 HOST_WIDE_INT bytes
2055 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2056 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2058 /* Variable sized entities are always passed/returned in memory. */
2059 if (bytes < 0)
2060 return 0;
2062 if (mode != VOIDmode)
2064 /* The value of "named" doesn't matter. */
2065 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2066 if (targetm.calls.must_pass_in_stack (arg))
2067 return 0;
2070 if (type && AGGREGATE_TYPE_P (type))
2072 int i;
2073 tree field;
2074 enum x86_64_reg_class subclasses[MAX_CLASSES];
2076 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2077 if (bytes > 64)
2078 return 0;
2080 for (i = 0; i < words; i++)
2081 classes[i] = X86_64_NO_CLASS;
2083 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2084 signalize memory class, so handle it as special case. */
2085 if (!words)
2087 classes[0] = X86_64_NO_CLASS;
2088 return 1;
2091 /* Classify each field of record and merge classes. */
2092 switch (TREE_CODE (type))
2094 case RECORD_TYPE:
2095 /* And now merge the fields of structure. */
2096 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2098 if (TREE_CODE (field) == FIELD_DECL)
2100 int num;
2102 if (TREE_TYPE (field) == error_mark_node)
2103 continue;
2105 /* Bitfields are always classified as integer. Handle them
2106 early, since later code would consider them to be
2107 misaligned integers. */
2108 if (DECL_BIT_FIELD (field))
2110 for (i = (int_bit_position (field)
2111 + (bit_offset % 64)) / 8 / 8;
2112 i < ((int_bit_position (field) + (bit_offset % 64))
2113 + tree_to_shwi (DECL_SIZE (field))
2114 + 63) / 8 / 8; i++)
2115 classes[i]
2116 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2118 else
2120 int pos;
2122 type = TREE_TYPE (field);
2124 /* Flexible array member is ignored. */
2125 if (TYPE_MODE (type) == BLKmode
2126 && TREE_CODE (type) == ARRAY_TYPE
2127 && TYPE_SIZE (type) == NULL_TREE
2128 && TYPE_DOMAIN (type) != NULL_TREE
2129 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2130 == NULL_TREE))
2132 static bool warned;
2134 if (!warned && warn_psabi)
2136 warned = true;
2137 inform (input_location,
2138 "the ABI of passing struct with"
2139 " a flexible array member has"
2140 " changed in GCC 4.4");
2142 continue;
2144 num = classify_argument (TYPE_MODE (type), type,
2145 subclasses,
2146 (int_bit_position (field)
2147 + bit_offset) % 512);
2148 if (!num)
2149 return 0;
2150 pos = (int_bit_position (field)
2151 + (bit_offset % 64)) / 8 / 8;
2152 for (i = 0; i < num && (i + pos) < words; i++)
2153 classes[i + pos]
2154 = merge_classes (subclasses[i], classes[i + pos]);
2158 break;
2160 case ARRAY_TYPE:
2161 /* Arrays are handled as small records. */
2163 int num;
2164 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2165 TREE_TYPE (type), subclasses, bit_offset);
2166 if (!num)
2167 return 0;
2169 /* The partial classes are now full classes. */
2170 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2171 subclasses[0] = X86_64_SSE_CLASS;
2172 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2173 && !((bit_offset % 64) == 0 && bytes == 4))
2174 subclasses[0] = X86_64_INTEGER_CLASS;
2176 for (i = 0; i < words; i++)
2177 classes[i] = subclasses[i % num];
2179 break;
2181 case UNION_TYPE:
2182 case QUAL_UNION_TYPE:
2183 /* Unions are similar to RECORD_TYPE but offset is always 0.
2185 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2187 if (TREE_CODE (field) == FIELD_DECL)
2189 int num;
2191 if (TREE_TYPE (field) == error_mark_node)
2192 continue;
2194 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2195 TREE_TYPE (field), subclasses,
2196 bit_offset);
2197 if (!num)
2198 return 0;
2199 for (i = 0; i < num && i < words; i++)
2200 classes[i] = merge_classes (subclasses[i], classes[i]);
2203 break;
2205 default:
2206 gcc_unreachable ();
2209 if (words > 2)
2211 /* When size > 16 bytes, if the first one isn't
2212 X86_64_SSE_CLASS or any other ones aren't
2213 X86_64_SSEUP_CLASS, everything should be passed in
2214 memory. */
2215 if (classes[0] != X86_64_SSE_CLASS)
2216 return 0;
2218 for (i = 1; i < words; i++)
2219 if (classes[i] != X86_64_SSEUP_CLASS)
2220 return 0;
2223 /* Final merger cleanup. */
2224 for (i = 0; i < words; i++)
2226 /* If one class is MEMORY, everything should be passed in
2227 memory. */
2228 if (classes[i] == X86_64_MEMORY_CLASS)
2229 return 0;
2231 /* The X86_64_SSEUP_CLASS should be always preceded by
2232 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2233 if (classes[i] == X86_64_SSEUP_CLASS
2234 && classes[i - 1] != X86_64_SSE_CLASS
2235 && classes[i - 1] != X86_64_SSEUP_CLASS)
2237 /* The first one should never be X86_64_SSEUP_CLASS. */
2238 gcc_assert (i != 0);
2239 classes[i] = X86_64_SSE_CLASS;
2242 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2243 everything should be passed in memory. */
2244 if (classes[i] == X86_64_X87UP_CLASS
2245 && (classes[i - 1] != X86_64_X87_CLASS))
2247 static bool warned;
2249 /* The first one should never be X86_64_X87UP_CLASS. */
2250 gcc_assert (i != 0);
2251 if (!warned && warn_psabi)
2253 warned = true;
2254 inform (input_location,
2255 "the ABI of passing union with %<long double%>"
2256 " has changed in GCC 4.4");
2258 return 0;
2261 return words;
2264 /* Compute alignment needed. We align all types to natural boundaries with
2265 exception of XFmode that is aligned to 64bits. */
2266 if (mode != VOIDmode && mode != BLKmode)
2268 int mode_alignment = GET_MODE_BITSIZE (mode);
2270 if (mode == XFmode)
2271 mode_alignment = 128;
2272 else if (mode == XCmode)
2273 mode_alignment = 256;
2274 if (COMPLEX_MODE_P (mode))
2275 mode_alignment /= 2;
2276 /* Misaligned fields are always returned in memory. */
2277 if (bit_offset % mode_alignment)
2278 return 0;
2281 /* for V1xx modes, just use the base mode */
2282 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2283 && GET_MODE_UNIT_SIZE (mode) == bytes)
2284 mode = GET_MODE_INNER (mode);
2286 /* Classification of atomic types. */
2287 switch (mode)
2289 case E_SDmode:
2290 case E_DDmode:
2291 classes[0] = X86_64_SSE_CLASS;
2292 return 1;
2293 case E_TDmode:
2294 classes[0] = X86_64_SSE_CLASS;
2295 classes[1] = X86_64_SSEUP_CLASS;
2296 return 2;
2297 case E_DImode:
2298 case E_SImode:
2299 case E_HImode:
2300 case E_QImode:
2301 case E_CSImode:
2302 case E_CHImode:
2303 case E_CQImode:
2305 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2307 /* Analyze last 128 bits only. */
2308 size = (size - 1) & 0x7f;
2310 if (size < 32)
2312 classes[0] = X86_64_INTEGERSI_CLASS;
2313 return 1;
2315 else if (size < 64)
2317 classes[0] = X86_64_INTEGER_CLASS;
2318 return 1;
2320 else if (size < 64+32)
2322 classes[0] = X86_64_INTEGER_CLASS;
2323 classes[1] = X86_64_INTEGERSI_CLASS;
2324 return 2;
2326 else if (size < 64+64)
2328 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2329 return 2;
2331 else
2332 gcc_unreachable ();
2334 case E_CDImode:
2335 case E_TImode:
2336 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2337 return 2;
2338 case E_COImode:
2339 case E_OImode:
2340 /* OImode shouldn't be used directly. */
2341 gcc_unreachable ();
2342 case E_CTImode:
2343 return 0;
2344 case E_SFmode:
2345 if (!(bit_offset % 64))
2346 classes[0] = X86_64_SSESF_CLASS;
2347 else
2348 classes[0] = X86_64_SSE_CLASS;
2349 return 1;
2350 case E_DFmode:
2351 classes[0] = X86_64_SSEDF_CLASS;
2352 return 1;
2353 case E_XFmode:
2354 classes[0] = X86_64_X87_CLASS;
2355 classes[1] = X86_64_X87UP_CLASS;
2356 return 2;
2357 case E_TFmode:
2358 classes[0] = X86_64_SSE_CLASS;
2359 classes[1] = X86_64_SSEUP_CLASS;
2360 return 2;
2361 case E_SCmode:
2362 classes[0] = X86_64_SSE_CLASS;
2363 if (!(bit_offset % 64))
2364 return 1;
2365 else
2367 static bool warned;
2369 if (!warned && warn_psabi)
2371 warned = true;
2372 inform (input_location,
2373 "the ABI of passing structure with %<complex float%>"
2374 " member has changed in GCC 4.4");
2376 classes[1] = X86_64_SSESF_CLASS;
2377 return 2;
2379 case E_DCmode:
2380 classes[0] = X86_64_SSEDF_CLASS;
2381 classes[1] = X86_64_SSEDF_CLASS;
2382 return 2;
2383 case E_XCmode:
2384 classes[0] = X86_64_COMPLEX_X87_CLASS;
2385 return 1;
2386 case E_TCmode:
2387 /* This modes is larger than 16 bytes. */
2388 return 0;
2389 case E_V8SFmode:
2390 case E_V8SImode:
2391 case E_V32QImode:
2392 case E_V16HImode:
2393 case E_V4DFmode:
2394 case E_V4DImode:
2395 classes[0] = X86_64_SSE_CLASS;
2396 classes[1] = X86_64_SSEUP_CLASS;
2397 classes[2] = X86_64_SSEUP_CLASS;
2398 classes[3] = X86_64_SSEUP_CLASS;
2399 return 4;
2400 case E_V8DFmode:
2401 case E_V16SFmode:
2402 case E_V8DImode:
2403 case E_V16SImode:
2404 case E_V32HImode:
2405 case E_V64QImode:
2406 classes[0] = X86_64_SSE_CLASS;
2407 classes[1] = X86_64_SSEUP_CLASS;
2408 classes[2] = X86_64_SSEUP_CLASS;
2409 classes[3] = X86_64_SSEUP_CLASS;
2410 classes[4] = X86_64_SSEUP_CLASS;
2411 classes[5] = X86_64_SSEUP_CLASS;
2412 classes[6] = X86_64_SSEUP_CLASS;
2413 classes[7] = X86_64_SSEUP_CLASS;
2414 return 8;
2415 case E_V4SFmode:
2416 case E_V4SImode:
2417 case E_V16QImode:
2418 case E_V8HImode:
2419 case E_V2DFmode:
2420 case E_V2DImode:
2421 classes[0] = X86_64_SSE_CLASS;
2422 classes[1] = X86_64_SSEUP_CLASS;
2423 return 2;
2424 case E_V1TImode:
2425 case E_V1DImode:
2426 case E_V2SFmode:
2427 case E_V2SImode:
2428 case E_V4HImode:
2429 case E_V8QImode:
2430 classes[0] = X86_64_SSE_CLASS;
2431 return 1;
2432 case E_BLKmode:
2433 case E_VOIDmode:
2434 return 0;
2435 default:
2436 gcc_assert (VECTOR_MODE_P (mode));
2438 if (bytes > 16)
2439 return 0;
2441 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2443 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2444 classes[0] = X86_64_INTEGERSI_CLASS;
2445 else
2446 classes[0] = X86_64_INTEGER_CLASS;
2447 classes[1] = X86_64_INTEGER_CLASS;
2448 return 1 + (bytes > 8);
2452 /* Examine the argument and return set number of register required in each
2453 class. Return true iff parameter should be passed in memory. */
2455 static bool
2456 examine_argument (machine_mode mode, const_tree type, int in_return,
2457 int *int_nregs, int *sse_nregs)
2459 enum x86_64_reg_class regclass[MAX_CLASSES];
2460 int n = classify_argument (mode, type, regclass, 0);
2462 *int_nregs = 0;
2463 *sse_nregs = 0;
2465 if (!n)
2466 return true;
2467 for (n--; n >= 0; n--)
2468 switch (regclass[n])
2470 case X86_64_INTEGER_CLASS:
2471 case X86_64_INTEGERSI_CLASS:
2472 (*int_nregs)++;
2473 break;
2474 case X86_64_SSE_CLASS:
2475 case X86_64_SSESF_CLASS:
2476 case X86_64_SSEDF_CLASS:
2477 (*sse_nregs)++;
2478 break;
2479 case X86_64_NO_CLASS:
2480 case X86_64_SSEUP_CLASS:
2481 break;
2482 case X86_64_X87_CLASS:
2483 case X86_64_X87UP_CLASS:
2484 case X86_64_COMPLEX_X87_CLASS:
2485 if (!in_return)
2486 return true;
2487 break;
2488 case X86_64_MEMORY_CLASS:
2489 gcc_unreachable ();
2492 return false;
2495 /* Construct container for the argument used by GCC interface. See
2496 FUNCTION_ARG for the detailed description. */
2498 static rtx
2499 construct_container (machine_mode mode, machine_mode orig_mode,
2500 const_tree type, int in_return, int nintregs, int nsseregs,
2501 const int *intreg, int sse_regno)
2503 /* The following variables hold the static issued_error state. */
2504 static bool issued_sse_arg_error;
2505 static bool issued_sse_ret_error;
2506 static bool issued_x87_ret_error;
2508 machine_mode tmpmode;
2509 int bytes
2510 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2511 enum x86_64_reg_class regclass[MAX_CLASSES];
2512 int n;
2513 int i;
2514 int nexps = 0;
2515 int needed_sseregs, needed_intregs;
2516 rtx exp[MAX_CLASSES];
2517 rtx ret;
2519 n = classify_argument (mode, type, regclass, 0);
2520 if (!n)
2521 return NULL;
2522 if (examine_argument (mode, type, in_return, &needed_intregs,
2523 &needed_sseregs))
2524 return NULL;
2525 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2526 return NULL;
2528 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2529 some less clueful developer tries to use floating-point anyway. */
2530 if (needed_sseregs && !TARGET_SSE)
2532 if (in_return)
2534 if (!issued_sse_ret_error)
2536 error ("SSE register return with SSE disabled");
2537 issued_sse_ret_error = true;
2540 else if (!issued_sse_arg_error)
2542 error ("SSE register argument with SSE disabled");
2543 issued_sse_arg_error = true;
2545 return NULL;
2548 /* Likewise, error if the ABI requires us to return values in the
2549 x87 registers and the user specified -mno-80387. */
2550 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2551 for (i = 0; i < n; i++)
2552 if (regclass[i] == X86_64_X87_CLASS
2553 || regclass[i] == X86_64_X87UP_CLASS
2554 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2556 if (!issued_x87_ret_error)
2558 error ("x87 register return with x87 disabled");
2559 issued_x87_ret_error = true;
2561 return NULL;
2564 /* First construct simple cases. Avoid SCmode, since we want to use
2565 single register to pass this type. */
2566 if (n == 1 && mode != SCmode)
2567 switch (regclass[0])
2569 case X86_64_INTEGER_CLASS:
2570 case X86_64_INTEGERSI_CLASS:
2571 return gen_rtx_REG (mode, intreg[0]);
2572 case X86_64_SSE_CLASS:
2573 case X86_64_SSESF_CLASS:
2574 case X86_64_SSEDF_CLASS:
2575 if (mode != BLKmode)
2576 return gen_reg_or_parallel (mode, orig_mode,
2577 GET_SSE_REGNO (sse_regno));
2578 break;
2579 case X86_64_X87_CLASS:
2580 case X86_64_COMPLEX_X87_CLASS:
2581 return gen_rtx_REG (mode, FIRST_STACK_REG);
2582 case X86_64_NO_CLASS:
2583 /* Zero sized array, struct or class. */
2584 return NULL;
2585 default:
2586 gcc_unreachable ();
2588 if (n == 2
2589 && regclass[0] == X86_64_SSE_CLASS
2590 && regclass[1] == X86_64_SSEUP_CLASS
2591 && mode != BLKmode)
2592 return gen_reg_or_parallel (mode, orig_mode,
2593 GET_SSE_REGNO (sse_regno));
2594 if (n == 4
2595 && regclass[0] == X86_64_SSE_CLASS
2596 && regclass[1] == X86_64_SSEUP_CLASS
2597 && regclass[2] == X86_64_SSEUP_CLASS
2598 && regclass[3] == X86_64_SSEUP_CLASS
2599 && mode != BLKmode)
2600 return gen_reg_or_parallel (mode, orig_mode,
2601 GET_SSE_REGNO (sse_regno));
2602 if (n == 8
2603 && regclass[0] == X86_64_SSE_CLASS
2604 && regclass[1] == X86_64_SSEUP_CLASS
2605 && regclass[2] == X86_64_SSEUP_CLASS
2606 && regclass[3] == X86_64_SSEUP_CLASS
2607 && regclass[4] == X86_64_SSEUP_CLASS
2608 && regclass[5] == X86_64_SSEUP_CLASS
2609 && regclass[6] == X86_64_SSEUP_CLASS
2610 && regclass[7] == X86_64_SSEUP_CLASS
2611 && mode != BLKmode)
2612 return gen_reg_or_parallel (mode, orig_mode,
2613 GET_SSE_REGNO (sse_regno));
2614 if (n == 2
2615 && regclass[0] == X86_64_X87_CLASS
2616 && regclass[1] == X86_64_X87UP_CLASS)
2617 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2619 if (n == 2
2620 && regclass[0] == X86_64_INTEGER_CLASS
2621 && regclass[1] == X86_64_INTEGER_CLASS
2622 && (mode == CDImode || mode == TImode || mode == BLKmode)
2623 && intreg[0] + 1 == intreg[1])
2625 if (mode == BLKmode)
2627 /* Use TImode for BLKmode values in 2 integer registers. */
2628 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2629 gen_rtx_REG (TImode, intreg[0]),
2630 GEN_INT (0));
2631 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2632 XVECEXP (ret, 0, 0) = exp[0];
2633 return ret;
2635 else
2636 return gen_rtx_REG (mode, intreg[0]);
2639 /* Otherwise figure out the entries of the PARALLEL. */
2640 for (i = 0; i < n; i++)
2642 int pos;
2644 switch (regclass[i])
2646 case X86_64_NO_CLASS:
2647 break;
2648 case X86_64_INTEGER_CLASS:
2649 case X86_64_INTEGERSI_CLASS:
2650 /* Merge TImodes on aligned occasions here too. */
2651 if (i * 8 + 8 > bytes)
2653 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2654 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2655 /* We've requested 24 bytes we
2656 don't have mode for. Use DImode. */
2657 tmpmode = DImode;
2659 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2660 tmpmode = SImode;
2661 else
2662 tmpmode = DImode;
2663 exp [nexps++]
2664 = gen_rtx_EXPR_LIST (VOIDmode,
2665 gen_rtx_REG (tmpmode, *intreg),
2666 GEN_INT (i*8));
2667 intreg++;
2668 break;
2669 case X86_64_SSESF_CLASS:
2670 exp [nexps++]
2671 = gen_rtx_EXPR_LIST (VOIDmode,
2672 gen_rtx_REG (SFmode,
2673 GET_SSE_REGNO (sse_regno)),
2674 GEN_INT (i*8));
2675 sse_regno++;
2676 break;
2677 case X86_64_SSEDF_CLASS:
2678 exp [nexps++]
2679 = gen_rtx_EXPR_LIST (VOIDmode,
2680 gen_rtx_REG (DFmode,
2681 GET_SSE_REGNO (sse_regno)),
2682 GEN_INT (i*8));
2683 sse_regno++;
2684 break;
2685 case X86_64_SSE_CLASS:
2686 pos = i;
2687 switch (n)
2689 case 1:
2690 tmpmode = DImode;
2691 break;
2692 case 2:
2693 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2695 tmpmode = TImode;
2696 i++;
2698 else
2699 tmpmode = DImode;
2700 break;
2701 case 4:
2702 gcc_assert (i == 0
2703 && regclass[1] == X86_64_SSEUP_CLASS
2704 && regclass[2] == X86_64_SSEUP_CLASS
2705 && regclass[3] == X86_64_SSEUP_CLASS);
2706 tmpmode = OImode;
2707 i += 3;
2708 break;
2709 case 8:
2710 gcc_assert (i == 0
2711 && regclass[1] == X86_64_SSEUP_CLASS
2712 && regclass[2] == X86_64_SSEUP_CLASS
2713 && regclass[3] == X86_64_SSEUP_CLASS
2714 && regclass[4] == X86_64_SSEUP_CLASS
2715 && regclass[5] == X86_64_SSEUP_CLASS
2716 && regclass[6] == X86_64_SSEUP_CLASS
2717 && regclass[7] == X86_64_SSEUP_CLASS);
2718 tmpmode = XImode;
2719 i += 7;
2720 break;
2721 default:
2722 gcc_unreachable ();
2724 exp [nexps++]
2725 = gen_rtx_EXPR_LIST (VOIDmode,
2726 gen_rtx_REG (tmpmode,
2727 GET_SSE_REGNO (sse_regno)),
2728 GEN_INT (pos*8));
2729 sse_regno++;
2730 break;
2731 default:
2732 gcc_unreachable ();
2736 /* Empty aligned struct, union or class. */
2737 if (nexps == 0)
2738 return NULL;
2740 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2741 for (i = 0; i < nexps; i++)
2742 XVECEXP (ret, 0, i) = exp [i];
2743 return ret;
2746 /* Update the data in CUM to advance over an argument of mode MODE
2747 and data type TYPE. (TYPE is null for libcalls where that information
2748 may not be available.)
2750 Return a number of integer regsiters advanced over. */
2752 static int
2753 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2754 const_tree type, HOST_WIDE_INT bytes,
2755 HOST_WIDE_INT words)
2757 int res = 0;
2758 bool error_p = false;
2760 if (TARGET_IAMCU)
2762 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2763 bytes in registers. */
2764 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2765 goto pass_in_reg;
2766 return res;
2769 switch (mode)
2771 default:
2772 break;
2774 case E_BLKmode:
2775 if (bytes < 0)
2776 break;
2777 /* FALLTHRU */
2779 case E_DImode:
2780 case E_SImode:
2781 case E_HImode:
2782 case E_QImode:
2783 pass_in_reg:
2784 cum->words += words;
2785 cum->nregs -= words;
2786 cum->regno += words;
2787 if (cum->nregs >= 0)
2788 res = words;
2789 if (cum->nregs <= 0)
2791 cum->nregs = 0;
2792 cfun->machine->arg_reg_available = false;
2793 cum->regno = 0;
2795 break;
2797 case E_OImode:
2798 /* OImode shouldn't be used directly. */
2799 gcc_unreachable ();
2801 case E_DFmode:
2802 if (cum->float_in_sse == -1)
2803 error_p = true;
2804 if (cum->float_in_sse < 2)
2805 break;
2806 /* FALLTHRU */
2807 case E_SFmode:
2808 if (cum->float_in_sse == -1)
2809 error_p = true;
2810 if (cum->float_in_sse < 1)
2811 break;
2812 /* FALLTHRU */
2814 case E_V8SFmode:
2815 case E_V8SImode:
2816 case E_V64QImode:
2817 case E_V32HImode:
2818 case E_V16SImode:
2819 case E_V8DImode:
2820 case E_V16SFmode:
2821 case E_V8DFmode:
2822 case E_V32QImode:
2823 case E_V16HImode:
2824 case E_V4DFmode:
2825 case E_V4DImode:
2826 case E_TImode:
2827 case E_V16QImode:
2828 case E_V8HImode:
2829 case E_V4SImode:
2830 case E_V2DImode:
2831 case E_V4SFmode:
2832 case E_V2DFmode:
2833 if (!type || !AGGREGATE_TYPE_P (type))
2835 cum->sse_words += words;
2836 cum->sse_nregs -= 1;
2837 cum->sse_regno += 1;
2838 if (cum->sse_nregs <= 0)
2840 cum->sse_nregs = 0;
2841 cum->sse_regno = 0;
2844 break;
2846 case E_V8QImode:
2847 case E_V4HImode:
2848 case E_V2SImode:
2849 case E_V2SFmode:
2850 case E_V1TImode:
2851 case E_V1DImode:
2852 if (!type || !AGGREGATE_TYPE_P (type))
2854 cum->mmx_words += words;
2855 cum->mmx_nregs -= 1;
2856 cum->mmx_regno += 1;
2857 if (cum->mmx_nregs <= 0)
2859 cum->mmx_nregs = 0;
2860 cum->mmx_regno = 0;
2863 break;
2865 if (error_p)
2867 cum->float_in_sse = 0;
2868 error ("calling %qD with SSE calling convention without "
2869 "SSE/SSE2 enabled", cum->decl);
2870 sorry ("this is a GCC bug that can be worked around by adding "
2871 "attribute used to function called");
2874 return res;
2877 static int
2878 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2879 const_tree type, HOST_WIDE_INT words, bool named)
2881 int int_nregs, sse_nregs;
2883 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2884 if (!named && (VALID_AVX512F_REG_MODE (mode)
2885 || VALID_AVX256_REG_MODE (mode)))
2886 return 0;
2888 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2889 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2891 cum->nregs -= int_nregs;
2892 cum->sse_nregs -= sse_nregs;
2893 cum->regno += int_nregs;
2894 cum->sse_regno += sse_nregs;
2895 return int_nregs;
2897 else
2899 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2900 cum->words = ROUND_UP (cum->words, align);
2901 cum->words += words;
2902 return 0;
2906 static int
2907 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2908 HOST_WIDE_INT words)
2910 /* Otherwise, this should be passed indirect. */
2911 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2913 cum->words += words;
2914 if (cum->nregs > 0)
2916 cum->nregs -= 1;
2917 cum->regno += 1;
2918 return 1;
2920 return 0;
2923 /* Update the data in CUM to advance over argument ARG. */
2925 static void
2926 ix86_function_arg_advance (cumulative_args_t cum_v,
2927 const function_arg_info &arg)
2929 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2930 machine_mode mode = arg.mode;
2931 HOST_WIDE_INT bytes, words;
2932 int nregs;
2934 /* The argument of interrupt handler is a special case and is
2935 handled in ix86_function_arg. */
2936 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2937 return;
2939 bytes = arg.promoted_size_in_bytes ();
2940 words = CEIL (bytes, UNITS_PER_WORD);
2942 if (arg.type)
2943 mode = type_natural_mode (arg.type, NULL, false);
2945 if (TARGET_64BIT)
2947 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2949 if (call_abi == MS_ABI)
2950 nregs = function_arg_advance_ms_64 (cum, bytes, words);
2951 else
2952 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2953 arg.named);
2955 else
2956 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2958 if (!nregs)
2960 /* Track if there are outgoing arguments on stack. */
2961 if (cum->caller)
2962 cfun->machine->outgoing_args_on_stack = true;
2966 /* Define where to put the arguments to a function.
2967 Value is zero to push the argument on the stack,
2968 or a hard register in which to store the argument.
2970 MODE is the argument's machine mode.
2971 TYPE is the data type of the argument (as a tree).
2972 This is null for libcalls where that information may
2973 not be available.
2974 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2975 the preceding args and about the function being called.
2976 NAMED is nonzero if this argument is a named parameter
2977 (otherwise it is an extra parameter matching an ellipsis). */
2979 static rtx
2980 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2981 machine_mode orig_mode, const_tree type,
2982 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2984 bool error_p = false;
2986 /* Avoid the AL settings for the Unix64 ABI. */
2987 if (mode == VOIDmode)
2988 return constm1_rtx;
2990 if (TARGET_IAMCU)
2992 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2993 bytes in registers. */
2994 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2995 goto pass_in_reg;
2996 return NULL_RTX;
2999 switch (mode)
3001 default:
3002 break;
3004 case E_BLKmode:
3005 if (bytes < 0)
3006 break;
3007 /* FALLTHRU */
3008 case E_DImode:
3009 case E_SImode:
3010 case E_HImode:
3011 case E_QImode:
3012 pass_in_reg:
3013 if (words <= cum->nregs)
3015 int regno = cum->regno;
3017 /* Fastcall allocates the first two DWORD (SImode) or
3018 smaller arguments to ECX and EDX if it isn't an
3019 aggregate type . */
3020 if (cum->fastcall)
3022 if (mode == BLKmode
3023 || mode == DImode
3024 || (type && AGGREGATE_TYPE_P (type)))
3025 break;
3027 /* ECX not EAX is the first allocated register. */
3028 if (regno == AX_REG)
3029 regno = CX_REG;
3031 return gen_rtx_REG (mode, regno);
3033 break;
3035 case E_DFmode:
3036 if (cum->float_in_sse == -1)
3037 error_p = true;
3038 if (cum->float_in_sse < 2)
3039 break;
3040 /* FALLTHRU */
3041 case E_SFmode:
3042 if (cum->float_in_sse == -1)
3043 error_p = true;
3044 if (cum->float_in_sse < 1)
3045 break;
3046 /* FALLTHRU */
3047 case E_TImode:
3048 /* In 32bit, we pass TImode in xmm registers. */
3049 case E_V16QImode:
3050 case E_V8HImode:
3051 case E_V4SImode:
3052 case E_V2DImode:
3053 case E_V4SFmode:
3054 case E_V2DFmode:
3055 if (!type || !AGGREGATE_TYPE_P (type))
3057 if (cum->sse_nregs)
3058 return gen_reg_or_parallel (mode, orig_mode,
3059 cum->sse_regno + FIRST_SSE_REG);
3061 break;
3063 case E_OImode:
3064 case E_XImode:
3065 /* OImode and XImode shouldn't be used directly. */
3066 gcc_unreachable ();
3068 case E_V64QImode:
3069 case E_V32HImode:
3070 case E_V16SImode:
3071 case E_V8DImode:
3072 case E_V16SFmode:
3073 case E_V8DFmode:
3074 case E_V8SFmode:
3075 case E_V8SImode:
3076 case E_V32QImode:
3077 case E_V16HImode:
3078 case E_V4DFmode:
3079 case E_V4DImode:
3080 if (!type || !AGGREGATE_TYPE_P (type))
3082 if (cum->sse_nregs)
3083 return gen_reg_or_parallel (mode, orig_mode,
3084 cum->sse_regno + FIRST_SSE_REG);
3086 break;
3088 case E_V8QImode:
3089 case E_V4HImode:
3090 case E_V2SImode:
3091 case E_V2SFmode:
3092 case E_V1TImode:
3093 case E_V1DImode:
3094 if (!type || !AGGREGATE_TYPE_P (type))
3096 if (cum->mmx_nregs)
3097 return gen_reg_or_parallel (mode, orig_mode,
3098 cum->mmx_regno + FIRST_MMX_REG);
3100 break;
3102 if (error_p)
3104 cum->float_in_sse = 0;
3105 error ("calling %qD with SSE calling convention without "
3106 "SSE/SSE2 enabled", cum->decl);
3107 sorry ("this is a GCC bug that can be worked around by adding "
3108 "attribute used to function called");
3111 return NULL_RTX;
3114 static rtx
3115 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3116 machine_mode orig_mode, const_tree type, bool named)
3118 /* Handle a hidden AL argument containing number of registers
3119 for varargs x86-64 functions. */
3120 if (mode == VOIDmode)
3121 return GEN_INT (cum->maybe_vaarg
3122 ? (cum->sse_nregs < 0
3123 ? X86_64_SSE_REGPARM_MAX
3124 : cum->sse_regno)
3125 : -1);
3127 switch (mode)
3129 default:
3130 break;
3132 case E_V8SFmode:
3133 case E_V8SImode:
3134 case E_V32QImode:
3135 case E_V16HImode:
3136 case E_V4DFmode:
3137 case E_V4DImode:
3138 case E_V16SFmode:
3139 case E_V16SImode:
3140 case E_V64QImode:
3141 case E_V32HImode:
3142 case E_V8DFmode:
3143 case E_V8DImode:
3144 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3145 if (!named)
3146 return NULL;
3147 break;
3150 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3151 cum->sse_nregs,
3152 &x86_64_int_parameter_registers [cum->regno],
3153 cum->sse_regno);
3156 static rtx
3157 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3158 machine_mode orig_mode, bool named, const_tree type,
3159 HOST_WIDE_INT bytes)
3161 unsigned int regno;
3163 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3164 We use value of -2 to specify that current function call is MSABI. */
3165 if (mode == VOIDmode)
3166 return GEN_INT (-2);
3168 /* If we've run out of registers, it goes on the stack. */
3169 if (cum->nregs == 0)
3170 return NULL_RTX;
3172 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3174 /* Only floating point modes are passed in anything but integer regs. */
3175 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3177 if (named)
3179 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3180 regno = cum->regno + FIRST_SSE_REG;
3182 else
3184 rtx t1, t2;
3186 /* Unnamed floating parameters are passed in both the
3187 SSE and integer registers. */
3188 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3189 t2 = gen_rtx_REG (mode, regno);
3190 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3191 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3192 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3195 /* Handle aggregated types passed in register. */
3196 if (orig_mode == BLKmode)
3198 if (bytes > 0 && bytes <= 8)
3199 mode = (bytes > 4 ? DImode : SImode);
3200 if (mode == BLKmode)
3201 mode = DImode;
3204 return gen_reg_or_parallel (mode, orig_mode, regno);
3207 /* Return where to put the arguments to a function.
3208 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3210 ARG describes the argument while CUM gives information about the
3211 preceding args and about the function being called. */
3213 static rtx
3214 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3216 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3217 machine_mode mode = arg.mode;
3218 HOST_WIDE_INT bytes, words;
3219 rtx reg;
3221 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3223 gcc_assert (arg.type != NULL_TREE);
3224 if (POINTER_TYPE_P (arg.type))
3226 /* This is the pointer argument. */
3227 gcc_assert (TYPE_MODE (arg.type) == Pmode);
3228 /* It is at -WORD(AP) in the current frame in interrupt and
3229 exception handlers. */
3230 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3232 else
3234 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3235 && TREE_CODE (arg.type) == INTEGER_TYPE
3236 && TYPE_MODE (arg.type) == word_mode);
3237 /* The error code is the word-mode integer argument at
3238 -2 * WORD(AP) in the current frame of the exception
3239 handler. */
3240 reg = gen_rtx_MEM (word_mode,
3241 plus_constant (Pmode,
3242 arg_pointer_rtx,
3243 -2 * UNITS_PER_WORD));
3245 return reg;
3248 bytes = arg.promoted_size_in_bytes ();
3249 words = CEIL (bytes, UNITS_PER_WORD);
3251 /* To simplify the code below, represent vector types with a vector mode
3252 even if MMX/SSE are not active. */
3253 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3254 mode = type_natural_mode (arg.type, cum, false);
3256 if (TARGET_64BIT)
3258 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3260 if (call_abi == MS_ABI)
3261 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3262 arg.type, bytes);
3263 else
3264 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3266 else
3267 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3269 /* Track if there are outgoing arguments on stack. */
3270 if (reg == NULL_RTX && cum->caller)
3271 cfun->machine->outgoing_args_on_stack = true;
3273 return reg;
3276 /* A C expression that indicates when an argument must be passed by
3277 reference. If nonzero for an argument, a copy of that argument is
3278 made in memory and a pointer to the argument is passed instead of
3279 the argument itself. The pointer is passed in whatever way is
3280 appropriate for passing a pointer to that type. */
3282 static bool
3283 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3285 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3287 if (TARGET_64BIT)
3289 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3291 /* See Windows x64 Software Convention. */
3292 if (call_abi == MS_ABI)
3294 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3296 if (tree type = arg.type)
3298 /* Arrays are passed by reference. */
3299 if (TREE_CODE (type) == ARRAY_TYPE)
3300 return true;
3302 if (RECORD_OR_UNION_TYPE_P (type))
3304 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3305 are passed by reference. */
3306 msize = int_size_in_bytes (type);
3310 /* __m128 is passed by reference. */
3311 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3313 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3314 return true;
3317 return false;
3320 /* Return true when TYPE should be 128bit aligned for 32bit argument
3321 passing ABI. XXX: This function is obsolete and is only used for
3322 checking psABI compatibility with previous versions of GCC. */
3324 static bool
3325 ix86_compat_aligned_value_p (const_tree type)
3327 machine_mode mode = TYPE_MODE (type);
3328 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3329 || mode == TDmode
3330 || mode == TFmode
3331 || mode == TCmode)
3332 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3333 return true;
3334 if (TYPE_ALIGN (type) < 128)
3335 return false;
3337 if (AGGREGATE_TYPE_P (type))
3339 /* Walk the aggregates recursively. */
3340 switch (TREE_CODE (type))
3342 case RECORD_TYPE:
3343 case UNION_TYPE:
3344 case QUAL_UNION_TYPE:
3346 tree field;
3348 /* Walk all the structure fields. */
3349 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3351 if (TREE_CODE (field) == FIELD_DECL
3352 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3353 return true;
3355 break;
3358 case ARRAY_TYPE:
3359 /* Just for use if some languages passes arrays by value. */
3360 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3361 return true;
3362 break;
3364 default:
3365 gcc_unreachable ();
3368 return false;
3371 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3372 XXX: This function is obsolete and is only used for checking psABI
3373 compatibility with previous versions of GCC. */
3375 static unsigned int
3376 ix86_compat_function_arg_boundary (machine_mode mode,
3377 const_tree type, unsigned int align)
3379 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3380 natural boundaries. */
3381 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3383 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3384 make an exception for SSE modes since these require 128bit
3385 alignment.
3387 The handling here differs from field_alignment. ICC aligns MMX
3388 arguments to 4 byte boundaries, while structure fields are aligned
3389 to 8 byte boundaries. */
3390 if (!type)
3392 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3393 align = PARM_BOUNDARY;
3395 else
3397 if (!ix86_compat_aligned_value_p (type))
3398 align = PARM_BOUNDARY;
3401 if (align > BIGGEST_ALIGNMENT)
3402 align = BIGGEST_ALIGNMENT;
3403 return align;
3406 /* Return true when TYPE should be 128bit aligned for 32bit argument
3407 passing ABI. */
3409 static bool
3410 ix86_contains_aligned_value_p (const_tree type)
3412 machine_mode mode = TYPE_MODE (type);
3414 if (mode == XFmode || mode == XCmode)
3415 return false;
3417 if (TYPE_ALIGN (type) < 128)
3418 return false;
3420 if (AGGREGATE_TYPE_P (type))
3422 /* Walk the aggregates recursively. */
3423 switch (TREE_CODE (type))
3425 case RECORD_TYPE:
3426 case UNION_TYPE:
3427 case QUAL_UNION_TYPE:
3429 tree field;
3431 /* Walk all the structure fields. */
3432 for (field = TYPE_FIELDS (type);
3433 field;
3434 field = DECL_CHAIN (field))
3436 if (TREE_CODE (field) == FIELD_DECL
3437 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3438 return true;
3440 break;
3443 case ARRAY_TYPE:
3444 /* Just for use if some languages passes arrays by value. */
3445 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3446 return true;
3447 break;
3449 default:
3450 gcc_unreachable ();
3453 else
3454 return TYPE_ALIGN (type) >= 128;
3456 return false;
3459 /* Gives the alignment boundary, in bits, of an argument with the
3460 specified mode and type. */
3462 static unsigned int
3463 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3465 unsigned int align;
3466 if (type)
3468 /* Since the main variant type is used for call, we convert it to
3469 the main variant type. */
3470 type = TYPE_MAIN_VARIANT (type);
3471 align = TYPE_ALIGN (type);
3472 if (TYPE_EMPTY_P (type))
3473 return PARM_BOUNDARY;
3475 else
3476 align = GET_MODE_ALIGNMENT (mode);
3477 if (align < PARM_BOUNDARY)
3478 align = PARM_BOUNDARY;
3479 else
3481 static bool warned;
3482 unsigned int saved_align = align;
3484 if (!TARGET_64BIT)
3486 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3487 if (!type)
3489 if (mode == XFmode || mode == XCmode)
3490 align = PARM_BOUNDARY;
3492 else if (!ix86_contains_aligned_value_p (type))
3493 align = PARM_BOUNDARY;
3495 if (align < 128)
3496 align = PARM_BOUNDARY;
3499 if (warn_psabi
3500 && !warned
3501 && align != ix86_compat_function_arg_boundary (mode, type,
3502 saved_align))
3504 warned = true;
3505 inform (input_location,
3506 "the ABI for passing parameters with %d-byte"
3507 " alignment has changed in GCC 4.6",
3508 align / BITS_PER_UNIT);
3512 return align;
3515 /* Return true if N is a possible register number of function value. */
3517 static bool
3518 ix86_function_value_regno_p (const unsigned int regno)
3520 switch (regno)
3522 case AX_REG:
3523 return true;
3524 case DX_REG:
3525 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3526 case DI_REG:
3527 case SI_REG:
3528 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3530 /* Complex values are returned in %st(0)/%st(1) pair. */
3531 case ST0_REG:
3532 case ST1_REG:
3533 /* TODO: The function should depend on current function ABI but
3534 builtins.c would need updating then. Therefore we use the
3535 default ABI. */
3536 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3537 return false;
3538 return TARGET_FLOAT_RETURNS_IN_80387;
3540 /* Complex values are returned in %xmm0/%xmm1 pair. */
3541 case XMM0_REG:
3542 case XMM1_REG:
3543 return TARGET_SSE;
3545 case MM0_REG:
3546 if (TARGET_MACHO || TARGET_64BIT)
3547 return false;
3548 return TARGET_MMX;
3551 return false;
3554 /* Define how to find the value returned by a function.
3555 VALTYPE is the data type of the value (as a tree).
3556 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3557 otherwise, FUNC is 0. */
3559 static rtx
3560 function_value_32 (machine_mode orig_mode, machine_mode mode,
3561 const_tree fntype, const_tree fn)
3563 unsigned int regno;
3565 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3566 we normally prevent this case when mmx is not available. However
3567 some ABIs may require the result to be returned like DImode. */
3568 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3569 regno = FIRST_MMX_REG;
3571 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3572 we prevent this case when sse is not available. However some ABIs
3573 may require the result to be returned like integer TImode. */
3574 else if (mode == TImode
3575 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3576 regno = FIRST_SSE_REG;
3578 /* 32-byte vector modes in %ymm0. */
3579 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3580 regno = FIRST_SSE_REG;
3582 /* 64-byte vector modes in %zmm0. */
3583 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3584 regno = FIRST_SSE_REG;
3586 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3587 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3588 regno = FIRST_FLOAT_REG;
3589 else
3590 /* Most things go in %eax. */
3591 regno = AX_REG;
3593 /* Override FP return register with %xmm0 for local functions when
3594 SSE math is enabled or for functions with sseregparm attribute. */
3595 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3597 int sse_level = ix86_function_sseregparm (fntype, fn, false);
3598 if (sse_level == -1)
3600 error ("calling %qD with SSE calling convention without "
3601 "SSE/SSE2 enabled", fn);
3602 sorry ("this is a GCC bug that can be worked around by adding "
3603 "attribute used to function called");
3605 else if ((sse_level >= 1 && mode == SFmode)
3606 || (sse_level == 2 && mode == DFmode))
3607 regno = FIRST_SSE_REG;
3610 /* OImode shouldn't be used directly. */
3611 gcc_assert (mode != OImode);
3613 return gen_rtx_REG (orig_mode, regno);
3616 static rtx
3617 function_value_64 (machine_mode orig_mode, machine_mode mode,
3618 const_tree valtype)
3620 rtx ret;
3622 /* Handle libcalls, which don't provide a type node. */
3623 if (valtype == NULL)
3625 unsigned int regno;
3627 switch (mode)
3629 case E_SFmode:
3630 case E_SCmode:
3631 case E_DFmode:
3632 case E_DCmode:
3633 case E_TFmode:
3634 case E_SDmode:
3635 case E_DDmode:
3636 case E_TDmode:
3637 regno = FIRST_SSE_REG;
3638 break;
3639 case E_XFmode:
3640 case E_XCmode:
3641 regno = FIRST_FLOAT_REG;
3642 break;
3643 case E_TCmode:
3644 return NULL;
3645 default:
3646 regno = AX_REG;
3649 return gen_rtx_REG (mode, regno);
3651 else if (POINTER_TYPE_P (valtype))
3653 /* Pointers are always returned in word_mode. */
3654 mode = word_mode;
3657 ret = construct_container (mode, orig_mode, valtype, 1,
3658 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3659 x86_64_int_return_registers, 0);
3661 /* For zero sized structures, construct_container returns NULL, but we
3662 need to keep rest of compiler happy by returning meaningful value. */
3663 if (!ret)
3664 ret = gen_rtx_REG (orig_mode, AX_REG);
3666 return ret;
3669 static rtx
3670 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3671 const_tree fntype, const_tree fn, const_tree valtype)
3673 unsigned int regno;
3675 /* Floating point return values in %st(0)
3676 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3677 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3678 && (GET_MODE_SIZE (mode) > 8
3679 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3681 regno = FIRST_FLOAT_REG;
3682 return gen_rtx_REG (orig_mode, regno);
3684 else
3685 return function_value_32(orig_mode, mode, fntype,fn);
3688 static rtx
3689 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3690 const_tree valtype)
3692 unsigned int regno = AX_REG;
3694 if (TARGET_SSE)
3696 switch (GET_MODE_SIZE (mode))
3698 case 16:
3699 if (valtype != NULL_TREE
3700 && !VECTOR_INTEGER_TYPE_P (valtype)
3701 && !VECTOR_INTEGER_TYPE_P (valtype)
3702 && !INTEGRAL_TYPE_P (valtype)
3703 && !VECTOR_FLOAT_TYPE_P (valtype))
3704 break;
3705 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3706 && !COMPLEX_MODE_P (mode))
3707 regno = FIRST_SSE_REG;
3708 break;
3709 case 8:
3710 case 4:
3711 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
3712 break;
3713 if (mode == SFmode || mode == DFmode)
3714 regno = FIRST_SSE_REG;
3715 break;
3716 default:
3717 break;
3720 return gen_rtx_REG (orig_mode, regno);
3723 static rtx
3724 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
3725 machine_mode orig_mode, machine_mode mode)
3727 const_tree fn, fntype;
3729 fn = NULL_TREE;
3730 if (fntype_or_decl && DECL_P (fntype_or_decl))
3731 fn = fntype_or_decl;
3732 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3734 if (ix86_function_type_abi (fntype) == MS_ABI)
3736 if (TARGET_64BIT)
3737 return function_value_ms_64 (orig_mode, mode, valtype);
3738 else
3739 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
3741 else if (TARGET_64BIT)
3742 return function_value_64 (orig_mode, mode, valtype);
3743 else
3744 return function_value_32 (orig_mode, mode, fntype, fn);
3747 static rtx
3748 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
3750 machine_mode mode, orig_mode;
3752 orig_mode = TYPE_MODE (valtype);
3753 mode = type_natural_mode (valtype, NULL, true);
3754 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
3757 /* Pointer function arguments and return values are promoted to
3758 word_mode for normal functions. */
3760 static machine_mode
3761 ix86_promote_function_mode (const_tree type, machine_mode mode,
3762 int *punsignedp, const_tree fntype,
3763 int for_return)
3765 if (cfun->machine->func_type == TYPE_NORMAL
3766 && type != NULL_TREE
3767 && POINTER_TYPE_P (type))
3769 *punsignedp = POINTERS_EXTEND_UNSIGNED;
3770 return word_mode;
3772 return default_promote_function_mode (type, mode, punsignedp, fntype,
3773 for_return);
3776 /* Return true if a structure, union or array with MODE containing FIELD
3777 should be accessed using BLKmode. */
3779 static bool
3780 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
3782 /* Union with XFmode must be in BLKmode. */
3783 return (mode == XFmode
3784 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
3785 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
3789 ix86_libcall_value (machine_mode mode)
3791 return ix86_function_value_1 (NULL, NULL, mode, mode);
3794 /* Return true iff type is returned in memory. */
3796 static bool
3797 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3799 #ifdef SUBTARGET_RETURN_IN_MEMORY
3800 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
3801 #else
3802 const machine_mode mode = type_natural_mode (type, NULL, true);
3803 HOST_WIDE_INT size;
3805 if (TARGET_64BIT)
3807 if (ix86_function_type_abi (fntype) == MS_ABI)
3809 size = int_size_in_bytes (type);
3811 /* __m128 is returned in xmm0. */
3812 if ((!type || VECTOR_INTEGER_TYPE_P (type)
3813 || INTEGRAL_TYPE_P (type)
3814 || VECTOR_FLOAT_TYPE_P (type))
3815 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
3816 && !COMPLEX_MODE_P (mode)
3817 && (GET_MODE_SIZE (mode) == 16 || size == 16))
3818 return false;
3820 /* Otherwise, the size must be exactly in [1248]. */
3821 return size != 1 && size != 2 && size != 4 && size != 8;
3823 else
3825 int needed_intregs, needed_sseregs;
3827 return examine_argument (mode, type, 1,
3828 &needed_intregs, &needed_sseregs);
3831 else
3833 size = int_size_in_bytes (type);
3835 /* Intel MCU psABI returns scalars and aggregates no larger than 8
3836 bytes in registers. */
3837 if (TARGET_IAMCU)
3838 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
3840 if (mode == BLKmode)
3841 return true;
3843 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3844 return false;
3846 if (VECTOR_MODE_P (mode) || mode == TImode)
3848 /* User-created vectors small enough to fit in EAX. */
3849 if (size < 8)
3850 return false;
3852 /* Unless ABI prescibes otherwise,
3853 MMX/3dNow values are returned in MM0 if available. */
3855 if (size == 8)
3856 return TARGET_VECT8_RETURNS || !TARGET_MMX;
3858 /* SSE values are returned in XMM0 if available. */
3859 if (size == 16)
3860 return !TARGET_SSE;
3862 /* AVX values are returned in YMM0 if available. */
3863 if (size == 32)
3864 return !TARGET_AVX;
3866 /* AVX512F values are returned in ZMM0 if available. */
3867 if (size == 64)
3868 return !TARGET_AVX512F;
3871 if (mode == XFmode)
3872 return false;
3874 if (size > 12)
3875 return true;
3877 /* OImode shouldn't be used directly. */
3878 gcc_assert (mode != OImode);
3880 return false;
3882 #endif
3886 /* Create the va_list data type. */
3888 static tree
3889 ix86_build_builtin_va_list_64 (void)
3891 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3893 record = lang_hooks.types.make_type (RECORD_TYPE);
3894 type_decl = build_decl (BUILTINS_LOCATION,
3895 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3897 f_gpr = build_decl (BUILTINS_LOCATION,
3898 FIELD_DECL, get_identifier ("gp_offset"),
3899 unsigned_type_node);
3900 f_fpr = build_decl (BUILTINS_LOCATION,
3901 FIELD_DECL, get_identifier ("fp_offset"),
3902 unsigned_type_node);
3903 f_ovf = build_decl (BUILTINS_LOCATION,
3904 FIELD_DECL, get_identifier ("overflow_arg_area"),
3905 ptr_type_node);
3906 f_sav = build_decl (BUILTINS_LOCATION,
3907 FIELD_DECL, get_identifier ("reg_save_area"),
3908 ptr_type_node);
3910 va_list_gpr_counter_field = f_gpr;
3911 va_list_fpr_counter_field = f_fpr;
3913 DECL_FIELD_CONTEXT (f_gpr) = record;
3914 DECL_FIELD_CONTEXT (f_fpr) = record;
3915 DECL_FIELD_CONTEXT (f_ovf) = record;
3916 DECL_FIELD_CONTEXT (f_sav) = record;
3918 TYPE_STUB_DECL (record) = type_decl;
3919 TYPE_NAME (record) = type_decl;
3920 TYPE_FIELDS (record) = f_gpr;
3921 DECL_CHAIN (f_gpr) = f_fpr;
3922 DECL_CHAIN (f_fpr) = f_ovf;
3923 DECL_CHAIN (f_ovf) = f_sav;
3925 layout_type (record);
3927 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
3928 NULL_TREE, TYPE_ATTRIBUTES (record));
3930 /* The correct type is an array type of one element. */
3931 return build_array_type (record, build_index_type (size_zero_node));
3934 /* Setup the builtin va_list data type and for 64-bit the additional
3935 calling convention specific va_list data types. */
3937 static tree
3938 ix86_build_builtin_va_list (void)
3940 if (TARGET_64BIT)
3942 /* Initialize ABI specific va_list builtin types.
3944 In lto1, we can encounter two va_list types:
3945 - one as a result of the type-merge across TUs, and
3946 - the one constructed here.
3947 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3948 a type identity check in canonical_va_list_type based on
3949 TYPE_MAIN_VARIANT (which we used to have) will not work.
3950 Instead, we tag each va_list_type_node with its unique attribute, and
3951 look for the attribute in the type identity check in
3952 canonical_va_list_type.
3954 Tagging sysv_va_list_type_node directly with the attribute is
3955 problematic since it's a array of one record, which will degrade into a
3956 pointer to record when used as parameter (see build_va_arg comments for
3957 an example), dropping the attribute in the process. So we tag the
3958 record instead. */
3960 /* For SYSV_ABI we use an array of one record. */
3961 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
3963 /* For MS_ABI we use plain pointer to argument area. */
3964 tree char_ptr_type = build_pointer_type (char_type_node);
3965 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
3966 TYPE_ATTRIBUTES (char_ptr_type));
3967 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
3969 return ((ix86_abi == MS_ABI)
3970 ? ms_va_list_type_node
3971 : sysv_va_list_type_node);
3973 else
3975 /* For i386 we use plain pointer to argument area. */
3976 return build_pointer_type (char_type_node);
3980 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3982 static void
3983 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
3985 rtx save_area, mem;
3986 alias_set_type set;
3987 int i, max;
3989 /* GPR size of varargs save area. */
3990 if (cfun->va_list_gpr_size)
3991 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
3992 else
3993 ix86_varargs_gpr_size = 0;
3995 /* FPR size of varargs save area. We don't need it if we don't pass
3996 anything in SSE registers. */
3997 if (TARGET_SSE && cfun->va_list_fpr_size)
3998 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
3999 else
4000 ix86_varargs_fpr_size = 0;
4002 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4003 return;
4005 save_area = frame_pointer_rtx;
4006 set = get_varargs_alias_set ();
4008 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4009 if (max > X86_64_REGPARM_MAX)
4010 max = X86_64_REGPARM_MAX;
4012 for (i = cum->regno; i < max; i++)
4014 mem = gen_rtx_MEM (word_mode,
4015 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4016 MEM_NOTRAP_P (mem) = 1;
4017 set_mem_alias_set (mem, set);
4018 emit_move_insn (mem,
4019 gen_rtx_REG (word_mode,
4020 x86_64_int_parameter_registers[i]));
4023 if (ix86_varargs_fpr_size)
4025 machine_mode smode;
4026 rtx_code_label *label;
4027 rtx test;
4029 /* Now emit code to save SSE registers. The AX parameter contains number
4030 of SSE parameter registers used to call this function, though all we
4031 actually check here is the zero/non-zero status. */
4033 label = gen_label_rtx ();
4034 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4035 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4036 label));
4038 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4039 we used movdqa (i.e. TImode) instead? Perhaps even better would
4040 be if we could determine the real mode of the data, via a hook
4041 into pass_stdarg. Ignore all that for now. */
4042 smode = V4SFmode;
4043 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4044 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4046 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4047 if (max > X86_64_SSE_REGPARM_MAX)
4048 max = X86_64_SSE_REGPARM_MAX;
4050 for (i = cum->sse_regno; i < max; ++i)
4052 mem = plus_constant (Pmode, save_area,
4053 i * 16 + ix86_varargs_gpr_size);
4054 mem = gen_rtx_MEM (smode, mem);
4055 MEM_NOTRAP_P (mem) = 1;
4056 set_mem_alias_set (mem, set);
4057 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4059 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4062 emit_label (label);
4066 static void
4067 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4069 alias_set_type set = get_varargs_alias_set ();
4070 int i;
4072 /* Reset to zero, as there might be a sysv vaarg used
4073 before. */
4074 ix86_varargs_gpr_size = 0;
4075 ix86_varargs_fpr_size = 0;
4077 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4079 rtx reg, mem;
4081 mem = gen_rtx_MEM (Pmode,
4082 plus_constant (Pmode, virtual_incoming_args_rtx,
4083 i * UNITS_PER_WORD));
4084 MEM_NOTRAP_P (mem) = 1;
4085 set_mem_alias_set (mem, set);
4087 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4088 emit_move_insn (mem, reg);
4092 static void
4093 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4094 const function_arg_info &arg,
4095 int *, int no_rtl)
4097 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4098 CUMULATIVE_ARGS next_cum;
4099 tree fntype;
4101 /* This argument doesn't appear to be used anymore. Which is good,
4102 because the old code here didn't suppress rtl generation. */
4103 gcc_assert (!no_rtl);
4105 if (!TARGET_64BIT)
4106 return;
4108 fntype = TREE_TYPE (current_function_decl);
4110 /* For varargs, we do not want to skip the dummy va_dcl argument.
4111 For stdargs, we do want to skip the last named argument. */
4112 next_cum = *cum;
4113 if (stdarg_p (fntype))
4114 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4116 if (cum->call_abi == MS_ABI)
4117 setup_incoming_varargs_ms_64 (&next_cum);
4118 else
4119 setup_incoming_varargs_64 (&next_cum);
4122 /* Checks if TYPE is of kind va_list char *. */
4124 static bool
4125 is_va_list_char_pointer (tree type)
4127 tree canonic;
4129 /* For 32-bit it is always true. */
4130 if (!TARGET_64BIT)
4131 return true;
4132 canonic = ix86_canonical_va_list_type (type);
4133 return (canonic == ms_va_list_type_node
4134 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4137 /* Implement va_start. */
4139 static void
4140 ix86_va_start (tree valist, rtx nextarg)
4142 HOST_WIDE_INT words, n_gpr, n_fpr;
4143 tree f_gpr, f_fpr, f_ovf, f_sav;
4144 tree gpr, fpr, ovf, sav, t;
4145 tree type;
4146 rtx ovf_rtx;
4148 if (flag_split_stack
4149 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4151 unsigned int scratch_regno;
4153 /* When we are splitting the stack, we can't refer to the stack
4154 arguments using internal_arg_pointer, because they may be on
4155 the old stack. The split stack prologue will arrange to
4156 leave a pointer to the old stack arguments in a scratch
4157 register, which we here copy to a pseudo-register. The split
4158 stack prologue can't set the pseudo-register directly because
4159 it (the prologue) runs before any registers have been saved. */
4161 scratch_regno = split_stack_prologue_scratch_regno ();
4162 if (scratch_regno != INVALID_REGNUM)
4164 rtx reg;
4165 rtx_insn *seq;
4167 reg = gen_reg_rtx (Pmode);
4168 cfun->machine->split_stack_varargs_pointer = reg;
4170 start_sequence ();
4171 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4172 seq = get_insns ();
4173 end_sequence ();
4175 push_topmost_sequence ();
4176 emit_insn_after (seq, entry_of_function ());
4177 pop_topmost_sequence ();
4181 /* Only 64bit target needs something special. */
4182 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4184 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4185 std_expand_builtin_va_start (valist, nextarg);
4186 else
4188 rtx va_r, next;
4190 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4191 next = expand_binop (ptr_mode, add_optab,
4192 cfun->machine->split_stack_varargs_pointer,
4193 crtl->args.arg_offset_rtx,
4194 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4195 convert_move (va_r, next, 0);
4197 return;
4200 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4201 f_fpr = DECL_CHAIN (f_gpr);
4202 f_ovf = DECL_CHAIN (f_fpr);
4203 f_sav = DECL_CHAIN (f_ovf);
4205 valist = build_simple_mem_ref (valist);
4206 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4207 /* The following should be folded into the MEM_REF offset. */
4208 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4209 f_gpr, NULL_TREE);
4210 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4211 f_fpr, NULL_TREE);
4212 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4213 f_ovf, NULL_TREE);
4214 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4215 f_sav, NULL_TREE);
4217 /* Count number of gp and fp argument registers used. */
4218 words = crtl->args.info.words;
4219 n_gpr = crtl->args.info.regno;
4220 n_fpr = crtl->args.info.sse_regno;
4222 if (cfun->va_list_gpr_size)
4224 type = TREE_TYPE (gpr);
4225 t = build2 (MODIFY_EXPR, type,
4226 gpr, build_int_cst (type, n_gpr * 8));
4227 TREE_SIDE_EFFECTS (t) = 1;
4228 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4231 if (TARGET_SSE && cfun->va_list_fpr_size)
4233 type = TREE_TYPE (fpr);
4234 t = build2 (MODIFY_EXPR, type, fpr,
4235 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4236 TREE_SIDE_EFFECTS (t) = 1;
4237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4240 /* Find the overflow area. */
4241 type = TREE_TYPE (ovf);
4242 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4243 ovf_rtx = crtl->args.internal_arg_pointer;
4244 else
4245 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4246 t = make_tree (type, ovf_rtx);
4247 if (words != 0)
4248 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4250 t = build2 (MODIFY_EXPR, type, ovf, t);
4251 TREE_SIDE_EFFECTS (t) = 1;
4252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4254 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4256 /* Find the register save area.
4257 Prologue of the function save it right above stack frame. */
4258 type = TREE_TYPE (sav);
4259 t = make_tree (type, frame_pointer_rtx);
4260 if (!ix86_varargs_gpr_size)
4261 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4263 t = build2 (MODIFY_EXPR, type, sav, t);
4264 TREE_SIDE_EFFECTS (t) = 1;
4265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4269 /* Implement va_arg. */
4271 static tree
4272 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4273 gimple_seq *post_p)
4275 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4276 tree f_gpr, f_fpr, f_ovf, f_sav;
4277 tree gpr, fpr, ovf, sav, t;
4278 int size, rsize;
4279 tree lab_false, lab_over = NULL_TREE;
4280 tree addr, t2;
4281 rtx container;
4282 int indirect_p = 0;
4283 tree ptrtype;
4284 machine_mode nat_mode;
4285 unsigned int arg_boundary;
4286 unsigned int type_align;
4288 /* Only 64bit target needs something special. */
4289 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4290 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4292 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4293 f_fpr = DECL_CHAIN (f_gpr);
4294 f_ovf = DECL_CHAIN (f_fpr);
4295 f_sav = DECL_CHAIN (f_ovf);
4297 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4298 valist, f_gpr, NULL_TREE);
4300 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4301 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4302 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4304 indirect_p = pass_va_arg_by_reference (type);
4305 if (indirect_p)
4306 type = build_pointer_type (type);
4307 size = arg_int_size_in_bytes (type);
4308 rsize = CEIL (size, UNITS_PER_WORD);
4310 nat_mode = type_natural_mode (type, NULL, false);
4311 switch (nat_mode)
4313 case E_V8SFmode:
4314 case E_V8SImode:
4315 case E_V32QImode:
4316 case E_V16HImode:
4317 case E_V4DFmode:
4318 case E_V4DImode:
4319 case E_V16SFmode:
4320 case E_V16SImode:
4321 case E_V64QImode:
4322 case E_V32HImode:
4323 case E_V8DFmode:
4324 case E_V8DImode:
4325 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4326 if (!TARGET_64BIT_MS_ABI)
4328 container = NULL;
4329 break;
4331 /* FALLTHRU */
4333 default:
4334 container = construct_container (nat_mode, TYPE_MODE (type),
4335 type, 0, X86_64_REGPARM_MAX,
4336 X86_64_SSE_REGPARM_MAX, intreg,
4338 break;
4341 /* Pull the value out of the saved registers. */
4343 addr = create_tmp_var (ptr_type_node, "addr");
4344 type_align = TYPE_ALIGN (type);
4346 if (container)
4348 int needed_intregs, needed_sseregs;
4349 bool need_temp;
4350 tree int_addr, sse_addr;
4352 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4353 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4355 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4357 need_temp = (!REG_P (container)
4358 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4359 || TYPE_ALIGN (type) > 128));
4361 /* In case we are passing structure, verify that it is consecutive block
4362 on the register save area. If not we need to do moves. */
4363 if (!need_temp && !REG_P (container))
4365 /* Verify that all registers are strictly consecutive */
4366 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4368 int i;
4370 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4372 rtx slot = XVECEXP (container, 0, i);
4373 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4374 || INTVAL (XEXP (slot, 1)) != i * 16)
4375 need_temp = true;
4378 else
4380 int i;
4382 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4384 rtx slot = XVECEXP (container, 0, i);
4385 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4386 || INTVAL (XEXP (slot, 1)) != i * 8)
4387 need_temp = true;
4391 if (!need_temp)
4393 int_addr = addr;
4394 sse_addr = addr;
4396 else
4398 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4399 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4402 /* First ensure that we fit completely in registers. */
4403 if (needed_intregs)
4405 t = build_int_cst (TREE_TYPE (gpr),
4406 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4407 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4408 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4409 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4410 gimplify_and_add (t, pre_p);
4412 if (needed_sseregs)
4414 t = build_int_cst (TREE_TYPE (fpr),
4415 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4416 + X86_64_REGPARM_MAX * 8);
4417 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4418 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4419 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4420 gimplify_and_add (t, pre_p);
4423 /* Compute index to start of area used for integer regs. */
4424 if (needed_intregs)
4426 /* int_addr = gpr + sav; */
4427 t = fold_build_pointer_plus (sav, gpr);
4428 gimplify_assign (int_addr, t, pre_p);
4430 if (needed_sseregs)
4432 /* sse_addr = fpr + sav; */
4433 t = fold_build_pointer_plus (sav, fpr);
4434 gimplify_assign (sse_addr, t, pre_p);
4436 if (need_temp)
4438 int i, prev_size = 0;
4439 tree temp = create_tmp_var (type, "va_arg_tmp");
4441 /* addr = &temp; */
4442 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4443 gimplify_assign (addr, t, pre_p);
4445 for (i = 0; i < XVECLEN (container, 0); i++)
4447 rtx slot = XVECEXP (container, 0, i);
4448 rtx reg = XEXP (slot, 0);
4449 machine_mode mode = GET_MODE (reg);
4450 tree piece_type;
4451 tree addr_type;
4452 tree daddr_type;
4453 tree src_addr, src;
4454 int src_offset;
4455 tree dest_addr, dest;
4456 int cur_size = GET_MODE_SIZE (mode);
4458 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4459 prev_size = INTVAL (XEXP (slot, 1));
4460 if (prev_size + cur_size > size)
4462 cur_size = size - prev_size;
4463 unsigned int nbits = cur_size * BITS_PER_UNIT;
4464 if (!int_mode_for_size (nbits, 1).exists (&mode))
4465 mode = QImode;
4467 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4468 if (mode == GET_MODE (reg))
4469 addr_type = build_pointer_type (piece_type);
4470 else
4471 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4472 true);
4473 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4474 true);
4476 if (SSE_REGNO_P (REGNO (reg)))
4478 src_addr = sse_addr;
4479 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4481 else
4483 src_addr = int_addr;
4484 src_offset = REGNO (reg) * 8;
4486 src_addr = fold_convert (addr_type, src_addr);
4487 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4489 dest_addr = fold_convert (daddr_type, addr);
4490 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4491 if (cur_size == GET_MODE_SIZE (mode))
4493 src = build_va_arg_indirect_ref (src_addr);
4494 dest = build_va_arg_indirect_ref (dest_addr);
4496 gimplify_assign (dest, src, pre_p);
4498 else
4500 tree copy
4501 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4502 3, dest_addr, src_addr,
4503 size_int (cur_size));
4504 gimplify_and_add (copy, pre_p);
4506 prev_size += cur_size;
4510 if (needed_intregs)
4512 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4513 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4514 gimplify_assign (gpr, t, pre_p);
4515 /* The GPR save area guarantees only 8-byte alignment. */
4516 if (!need_temp)
4517 type_align = MIN (type_align, 64);
4520 if (needed_sseregs)
4522 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4523 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4524 gimplify_assign (unshare_expr (fpr), t, pre_p);
4527 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4529 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4532 /* ... otherwise out of the overflow area. */
4534 /* When we align parameter on stack for caller, if the parameter
4535 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4536 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4537 here with caller. */
4538 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4539 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4540 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4542 /* Care for on-stack alignment if needed. */
4543 if (arg_boundary <= 64 || size == 0)
4544 t = ovf;
4545 else
4547 HOST_WIDE_INT align = arg_boundary / 8;
4548 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4549 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4550 build_int_cst (TREE_TYPE (t), -align));
4553 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4554 gimplify_assign (addr, t, pre_p);
4556 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4557 gimplify_assign (unshare_expr (ovf), t, pre_p);
4559 if (container)
4560 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4562 type = build_aligned_type (type, type_align);
4563 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4564 addr = fold_convert (ptrtype, addr);
4566 if (indirect_p)
4567 addr = build_va_arg_indirect_ref (addr);
4568 return build_va_arg_indirect_ref (addr);
4571 /* Return true if OPNUM's MEM should be matched
4572 in movabs* patterns. */
4574 bool
4575 ix86_check_movabs (rtx insn, int opnum)
4577 rtx set, mem;
4579 set = PATTERN (insn);
4580 if (GET_CODE (set) == PARALLEL)
4581 set = XVECEXP (set, 0, 0);
4582 gcc_assert (GET_CODE (set) == SET);
4583 mem = XEXP (set, opnum);
4584 while (SUBREG_P (mem))
4585 mem = SUBREG_REG (mem);
4586 gcc_assert (MEM_P (mem));
4587 return volatile_ok || !MEM_VOLATILE_P (mem);
4590 /* Return false if INSN contains a MEM with a non-default address space. */
4591 bool
4592 ix86_check_no_addr_space (rtx insn)
4594 subrtx_var_iterator::array_type array;
4595 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4597 rtx x = *iter;
4598 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4599 return false;
4601 return true;
4604 /* Initialize the table of extra 80387 mathematical constants. */
4606 static void
4607 init_ext_80387_constants (void)
4609 static const char * cst[5] =
4611 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4612 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4613 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4614 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4615 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4617 int i;
4619 for (i = 0; i < 5; i++)
4621 real_from_string (&ext_80387_constants_table[i], cst[i]);
4622 /* Ensure each constant is rounded to XFmode precision. */
4623 real_convert (&ext_80387_constants_table[i],
4624 XFmode, &ext_80387_constants_table[i]);
4627 ext_80387_constants_init = 1;
4630 /* Return non-zero if the constant is something that
4631 can be loaded with a special instruction. */
4634 standard_80387_constant_p (rtx x)
4636 machine_mode mode = GET_MODE (x);
4638 const REAL_VALUE_TYPE *r;
4640 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4641 return -1;
4643 if (x == CONST0_RTX (mode))
4644 return 1;
4645 if (x == CONST1_RTX (mode))
4646 return 2;
4648 r = CONST_DOUBLE_REAL_VALUE (x);
4650 /* For XFmode constants, try to find a special 80387 instruction when
4651 optimizing for size or on those CPUs that benefit from them. */
4652 if (mode == XFmode
4653 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
4655 int i;
4657 if (! ext_80387_constants_init)
4658 init_ext_80387_constants ();
4660 for (i = 0; i < 5; i++)
4661 if (real_identical (r, &ext_80387_constants_table[i]))
4662 return i + 3;
4665 /* Load of the constant -0.0 or -1.0 will be split as
4666 fldz;fchs or fld1;fchs sequence. */
4667 if (real_isnegzero (r))
4668 return 8;
4669 if (real_identical (r, &dconstm1))
4670 return 9;
4672 return 0;
4675 /* Return the opcode of the special instruction to be used to load
4676 the constant X. */
4678 const char *
4679 standard_80387_constant_opcode (rtx x)
4681 switch (standard_80387_constant_p (x))
4683 case 1:
4684 return "fldz";
4685 case 2:
4686 return "fld1";
4687 case 3:
4688 return "fldlg2";
4689 case 4:
4690 return "fldln2";
4691 case 5:
4692 return "fldl2e";
4693 case 6:
4694 return "fldl2t";
4695 case 7:
4696 return "fldpi";
4697 case 8:
4698 case 9:
4699 return "#";
4700 default:
4701 gcc_unreachable ();
4705 /* Return the CONST_DOUBLE representing the 80387 constant that is
4706 loaded by the specified special instruction. The argument IDX
4707 matches the return value from standard_80387_constant_p. */
4710 standard_80387_constant_rtx (int idx)
4712 int i;
4714 if (! ext_80387_constants_init)
4715 init_ext_80387_constants ();
4717 switch (idx)
4719 case 3:
4720 case 4:
4721 case 5:
4722 case 6:
4723 case 7:
4724 i = idx - 3;
4725 break;
4727 default:
4728 gcc_unreachable ();
4731 return const_double_from_real_value (ext_80387_constants_table[i],
4732 XFmode);
4735 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4736 in supported SSE/AVX vector mode. */
4739 standard_sse_constant_p (rtx x, machine_mode pred_mode)
4741 machine_mode mode;
4743 if (!TARGET_SSE)
4744 return 0;
4746 mode = GET_MODE (x);
4748 if (x == const0_rtx || const0_operand (x, mode))
4749 return 1;
4751 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4753 /* VOIDmode integer constant, get mode from the predicate. */
4754 if (mode == VOIDmode)
4755 mode = pred_mode;
4757 switch (GET_MODE_SIZE (mode))
4759 case 64:
4760 if (TARGET_AVX512F)
4761 return 2;
4762 break;
4763 case 32:
4764 if (TARGET_AVX2)
4765 return 2;
4766 break;
4767 case 16:
4768 if (TARGET_SSE2)
4769 return 2;
4770 break;
4771 case 0:
4772 /* VOIDmode */
4773 gcc_unreachable ();
4774 default:
4775 break;
4779 return 0;
4782 /* Return the opcode of the special instruction to be used to load
4783 the constant operands[1] into operands[0]. */
4785 const char *
4786 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
4788 machine_mode mode;
4789 rtx x = operands[1];
4791 gcc_assert (TARGET_SSE);
4793 mode = GET_MODE (x);
4795 if (x == const0_rtx || const0_operand (x, mode))
4797 switch (get_attr_mode (insn))
4799 case MODE_TI:
4800 if (!EXT_REX_SSE_REG_P (operands[0]))
4801 return "%vpxor\t%0, %d0";
4802 /* FALLTHRU */
4803 case MODE_XI:
4804 case MODE_OI:
4805 if (EXT_REX_SSE_REG_P (operands[0]))
4806 return (TARGET_AVX512VL
4807 ? "vpxord\t%x0, %x0, %x0"
4808 : "vpxord\t%g0, %g0, %g0");
4809 return "vpxor\t%x0, %x0, %x0";
4811 case MODE_V2DF:
4812 if (!EXT_REX_SSE_REG_P (operands[0]))
4813 return "%vxorpd\t%0, %d0";
4814 /* FALLTHRU */
4815 case MODE_V8DF:
4816 case MODE_V4DF:
4817 if (!EXT_REX_SSE_REG_P (operands[0]))
4818 return "vxorpd\t%x0, %x0, %x0";
4819 else if (TARGET_AVX512DQ)
4820 return (TARGET_AVX512VL
4821 ? "vxorpd\t%x0, %x0, %x0"
4822 : "vxorpd\t%g0, %g0, %g0");
4823 else
4824 return (TARGET_AVX512VL
4825 ? "vpxorq\t%x0, %x0, %x0"
4826 : "vpxorq\t%g0, %g0, %g0");
4828 case MODE_V4SF:
4829 if (!EXT_REX_SSE_REG_P (operands[0]))
4830 return "%vxorps\t%0, %d0";
4831 /* FALLTHRU */
4832 case MODE_V16SF:
4833 case MODE_V8SF:
4834 if (!EXT_REX_SSE_REG_P (operands[0]))
4835 return "vxorps\t%x0, %x0, %x0";
4836 else if (TARGET_AVX512DQ)
4837 return (TARGET_AVX512VL
4838 ? "vxorps\t%x0, %x0, %x0"
4839 : "vxorps\t%g0, %g0, %g0");
4840 else
4841 return (TARGET_AVX512VL
4842 ? "vpxord\t%x0, %x0, %x0"
4843 : "vpxord\t%g0, %g0, %g0");
4845 default:
4846 gcc_unreachable ();
4849 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
4851 enum attr_mode insn_mode = get_attr_mode (insn);
4853 switch (insn_mode)
4855 case MODE_XI:
4856 case MODE_V8DF:
4857 case MODE_V16SF:
4858 gcc_assert (TARGET_AVX512F);
4859 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4861 case MODE_OI:
4862 case MODE_V4DF:
4863 case MODE_V8SF:
4864 gcc_assert (TARGET_AVX2);
4865 /* FALLTHRU */
4866 case MODE_TI:
4867 case MODE_V2DF:
4868 case MODE_V4SF:
4869 gcc_assert (TARGET_SSE2);
4870 if (!EXT_REX_SSE_REG_P (operands[0]))
4871 return (TARGET_AVX
4872 ? "vpcmpeqd\t%0, %0, %0"
4873 : "pcmpeqd\t%0, %0");
4874 else if (TARGET_AVX512VL)
4875 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4876 else
4877 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4879 default:
4880 gcc_unreachable ();
4884 gcc_unreachable ();
4887 /* Returns true if INSN can be transformed from a memory load
4888 to a supported FP constant load. */
4890 bool
4891 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
4893 rtx src = find_constant_src (insn);
4895 gcc_assert (REG_P (dst));
4897 if (src == NULL
4898 || (SSE_REGNO_P (REGNO (dst))
4899 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
4900 || (STACK_REGNO_P (REGNO (dst))
4901 && standard_80387_constant_p (src) < 1))
4902 return false;
4904 return true;
4907 /* Predicate for pre-reload splitters with associated instructions,
4908 which can match any time before the split1 pass (usually combine),
4909 then are unconditionally split in that pass and should not be
4910 matched again afterwards. */
4912 bool
4913 ix86_pre_reload_split (void)
4915 return (can_create_pseudo_p ()
4916 && !(cfun->curr_properties & PROP_rtl_split_insns));
4919 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
4920 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
4921 TARGET_AVX512VL or it is a register to register move which can
4922 be done with zmm register move. */
4924 static const char *
4925 ix86_get_ssemov (rtx *operands, unsigned size,
4926 enum attr_mode insn_mode, machine_mode mode)
4928 char buf[128];
4929 bool misaligned_p = (misaligned_operand (operands[0], mode)
4930 || misaligned_operand (operands[1], mode));
4931 bool evex_reg_p = (size == 64
4932 || EXT_REX_SSE_REG_P (operands[0])
4933 || EXT_REX_SSE_REG_P (operands[1]));
4934 machine_mode scalar_mode;
4936 const char *opcode = NULL;
4937 enum
4939 opcode_int,
4940 opcode_float,
4941 opcode_double
4942 } type = opcode_int;
4944 switch (insn_mode)
4946 case MODE_V16SF:
4947 case MODE_V8SF:
4948 case MODE_V4SF:
4949 scalar_mode = E_SFmode;
4950 type = opcode_float;
4951 break;
4952 case MODE_V8DF:
4953 case MODE_V4DF:
4954 case MODE_V2DF:
4955 scalar_mode = E_DFmode;
4956 type = opcode_double;
4957 break;
4958 case MODE_XI:
4959 case MODE_OI:
4960 case MODE_TI:
4961 scalar_mode = GET_MODE_INNER (mode);
4962 break;
4963 default:
4964 gcc_unreachable ();
4967 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
4968 we can only use zmm register move without memory operand. */
4969 if (evex_reg_p
4970 && !TARGET_AVX512VL
4971 && GET_MODE_SIZE (mode) < 64)
4973 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
4974 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
4975 AVX512VL is disabled, LRA can still generate reg to
4976 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
4977 modes. */
4978 if (memory_operand (operands[0], mode)
4979 || memory_operand (operands[1], mode))
4980 gcc_unreachable ();
4981 size = 64;
4982 switch (type)
4984 case opcode_int:
4985 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
4986 break;
4987 case opcode_float:
4988 opcode = misaligned_p ? "vmovups" : "vmovaps";
4989 break;
4990 case opcode_double:
4991 opcode = misaligned_p ? "vmovupd" : "vmovapd";
4992 break;
4995 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
4997 switch (scalar_mode)
4999 case E_SFmode:
5000 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5001 break;
5002 case E_DFmode:
5003 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5004 break;
5005 case E_TFmode:
5006 if (evex_reg_p)
5007 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5008 else
5009 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5010 break;
5011 default:
5012 gcc_unreachable ();
5015 else if (SCALAR_INT_MODE_P (scalar_mode))
5017 switch (scalar_mode)
5019 case E_QImode:
5020 if (evex_reg_p)
5021 opcode = (misaligned_p
5022 ? (TARGET_AVX512BW
5023 ? "vmovdqu8"
5024 : "vmovdqu64")
5025 : "vmovdqa64");
5026 else
5027 opcode = (misaligned_p
5028 ? (TARGET_AVX512BW
5029 ? "vmovdqu8"
5030 : "%vmovdqu")
5031 : "%vmovdqa");
5032 break;
5033 case E_HImode:
5034 if (evex_reg_p)
5035 opcode = (misaligned_p
5036 ? (TARGET_AVX512BW
5037 ? "vmovdqu16"
5038 : "vmovdqu64")
5039 : "vmovdqa64");
5040 else
5041 opcode = (misaligned_p
5042 ? (TARGET_AVX512BW
5043 ? "vmovdqu16"
5044 : "%vmovdqu")
5045 : "%vmovdqa");
5046 break;
5047 case E_SImode:
5048 if (evex_reg_p)
5049 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5050 else
5051 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5052 break;
5053 case E_DImode:
5054 case E_TImode:
5055 case E_OImode:
5056 if (evex_reg_p)
5057 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5058 else
5059 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5060 break;
5061 case E_XImode:
5062 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5063 break;
5064 default:
5065 gcc_unreachable ();
5068 else
5069 gcc_unreachable ();
5071 switch (size)
5073 case 64:
5074 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5075 opcode);
5076 break;
5077 case 32:
5078 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5079 opcode);
5080 break;
5081 case 16:
5082 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5083 opcode);
5084 break;
5085 default:
5086 gcc_unreachable ();
5088 output_asm_insn (buf, operands);
5089 return "";
5092 /* Return the template of the TYPE_SSEMOV instruction to move
5093 operands[1] into operands[0]. */
5095 const char *
5096 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5098 machine_mode mode = GET_MODE (operands[0]);
5099 if (get_attr_type (insn) != TYPE_SSEMOV
5100 || mode != GET_MODE (operands[1]))
5101 gcc_unreachable ();
5103 enum attr_mode insn_mode = get_attr_mode (insn);
5105 switch (insn_mode)
5107 case MODE_XI:
5108 case MODE_V8DF:
5109 case MODE_V16SF:
5110 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5112 case MODE_OI:
5113 case MODE_V4DF:
5114 case MODE_V8SF:
5115 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5117 case MODE_TI:
5118 case MODE_V2DF:
5119 case MODE_V4SF:
5120 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5122 case MODE_DI:
5123 /* Handle broken assemblers that require movd instead of movq. */
5124 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5125 && (GENERAL_REG_P (operands[0])
5126 || GENERAL_REG_P (operands[1])))
5127 return "%vmovd\t{%1, %0|%0, %1}";
5128 else
5129 return "%vmovq\t{%1, %0|%0, %1}";
5131 case MODE_SI:
5132 return "%vmovd\t{%1, %0|%0, %1}";
5134 case MODE_DF:
5135 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5136 return "vmovsd\t{%d1, %0|%0, %d1}";
5137 else
5138 return "%vmovsd\t{%1, %0|%0, %1}";
5140 case MODE_SF:
5141 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5142 return "vmovss\t{%d1, %0|%0, %d1}";
5143 else
5144 return "%vmovss\t{%1, %0|%0, %1}";
5146 case MODE_V1DF:
5147 gcc_assert (!TARGET_AVX);
5148 return "movlpd\t{%1, %0|%0, %1}";
5150 case MODE_V2SF:
5151 if (TARGET_AVX && REG_P (operands[0]))
5152 return "vmovlps\t{%1, %d0|%d0, %1}";
5153 else
5154 return "%vmovlps\t{%1, %0|%0, %1}";
5156 default:
5157 gcc_unreachable ();
5161 /* Returns true if OP contains a symbol reference */
5163 bool
5164 symbolic_reference_mentioned_p (rtx op)
5166 const char *fmt;
5167 int i;
5169 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5170 return true;
5172 fmt = GET_RTX_FORMAT (GET_CODE (op));
5173 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5175 if (fmt[i] == 'E')
5177 int j;
5179 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5180 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5181 return true;
5184 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5185 return true;
5188 return false;
5191 /* Return true if it is appropriate to emit `ret' instructions in the
5192 body of a function. Do this only if the epilogue is simple, needing a
5193 couple of insns. Prior to reloading, we can't tell how many registers
5194 must be saved, so return false then. Return false if there is no frame
5195 marker to de-allocate. */
5197 bool
5198 ix86_can_use_return_insn_p (void)
5200 if (ix86_function_naked (current_function_decl))
5201 return false;
5203 /* Don't use `ret' instruction in interrupt handler. */
5204 if (! reload_completed
5205 || frame_pointer_needed
5206 || cfun->machine->func_type != TYPE_NORMAL)
5207 return 0;
5209 /* Don't allow more than 32k pop, since that's all we can do
5210 with one instruction. */
5211 if (crtl->args.pops_args && crtl->args.size >= 32768)
5212 return 0;
5214 struct ix86_frame &frame = cfun->machine->frame;
5215 return (frame.stack_pointer_offset == UNITS_PER_WORD
5216 && (frame.nregs + frame.nsseregs) == 0);
5219 /* Return stack frame size. get_frame_size () returns used stack slots
5220 during compilation, which may be optimized out later. If stack frame
5221 is needed, stack_frame_required should be true. */
5223 static HOST_WIDE_INT
5224 ix86_get_frame_size (void)
5226 if (cfun->machine->stack_frame_required)
5227 return get_frame_size ();
5228 else
5229 return 0;
5232 /* Value should be nonzero if functions must have frame pointers.
5233 Zero means the frame pointer need not be set up (and parms may
5234 be accessed via the stack pointer) in functions that seem suitable. */
5236 static bool
5237 ix86_frame_pointer_required (void)
5239 /* If we accessed previous frames, then the generated code expects
5240 to be able to access the saved ebp value in our frame. */
5241 if (cfun->machine->accesses_prev_frame)
5242 return true;
5244 /* Several x86 os'es need a frame pointer for other reasons,
5245 usually pertaining to setjmp. */
5246 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5247 return true;
5249 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5250 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5251 return true;
5253 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5254 allocation is 4GB. */
5255 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5256 return true;
5258 /* SSE saves require frame-pointer when stack is misaligned. */
5259 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5260 return true;
5262 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5263 turns off the frame pointer by default. Turn it back on now if
5264 we've not got a leaf function. */
5265 if (TARGET_OMIT_LEAF_FRAME_POINTER
5266 && (!crtl->is_leaf
5267 || ix86_current_function_calls_tls_descriptor))
5268 return true;
5270 /* Several versions of mcount for the x86 assumes that there is a
5271 frame, so we cannot allow profiling without a frame pointer. */
5272 if (crtl->profile && !flag_fentry)
5273 return true;
5275 return false;
5278 /* Record that the current function accesses previous call frames. */
5280 void
5281 ix86_setup_frame_addresses (void)
5283 cfun->machine->accesses_prev_frame = 1;
5286 #ifndef USE_HIDDEN_LINKONCE
5287 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5288 # define USE_HIDDEN_LINKONCE 1
5289 # else
5290 # define USE_HIDDEN_LINKONCE 0
5291 # endif
5292 #endif
5294 /* Label count for call and return thunks. It is used to make unique
5295 labels in call and return thunks. */
5296 static int indirectlabelno;
5298 /* True if call thunk function is needed. */
5299 static bool indirect_thunk_needed = false;
5301 /* Bit masks of integer registers, which contain branch target, used
5302 by call thunk functions. */
5303 static int indirect_thunks_used;
5305 /* True if return thunk function is needed. */
5306 static bool indirect_return_needed = false;
5308 /* True if return thunk function via CX is needed. */
5309 static bool indirect_return_via_cx;
5311 #ifndef INDIRECT_LABEL
5312 # define INDIRECT_LABEL "LIND"
5313 #endif
5315 /* Indicate what prefix is needed for an indirect branch. */
5316 enum indirect_thunk_prefix
5318 indirect_thunk_prefix_none,
5319 indirect_thunk_prefix_nt
5322 /* Return the prefix needed for an indirect branch INSN. */
5324 enum indirect_thunk_prefix
5325 indirect_thunk_need_prefix (rtx_insn *insn)
5327 enum indirect_thunk_prefix need_prefix;
5328 if ((cfun->machine->indirect_branch_type
5329 == indirect_branch_thunk_extern)
5330 && ix86_notrack_prefixed_insn_p (insn))
5332 /* NOTRACK prefix is only used with external thunk so that it
5333 can be properly updated to support CET at run-time. */
5334 need_prefix = indirect_thunk_prefix_nt;
5336 else
5337 need_prefix = indirect_thunk_prefix_none;
5338 return need_prefix;
5341 /* Fills in the label name that should be used for the indirect thunk. */
5343 static void
5344 indirect_thunk_name (char name[32], unsigned int regno,
5345 enum indirect_thunk_prefix need_prefix,
5346 bool ret_p)
5348 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5349 gcc_unreachable ();
5351 if (USE_HIDDEN_LINKONCE)
5353 const char *prefix;
5355 if (need_prefix == indirect_thunk_prefix_nt
5356 && regno != INVALID_REGNUM)
5358 /* NOTRACK prefix is only used with external thunk via
5359 register so that NOTRACK prefix can be added to indirect
5360 branch via register to support CET at run-time. */
5361 prefix = "_nt";
5363 else
5364 prefix = "";
5366 const char *ret = ret_p ? "return" : "indirect";
5368 if (regno != INVALID_REGNUM)
5370 const char *reg_prefix;
5371 if (LEGACY_INT_REGNO_P (regno))
5372 reg_prefix = TARGET_64BIT ? "r" : "e";
5373 else
5374 reg_prefix = "";
5375 sprintf (name, "__x86_%s_thunk%s_%s%s",
5376 ret, prefix, reg_prefix, reg_names[regno]);
5378 else
5379 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5381 else
5383 if (regno != INVALID_REGNUM)
5384 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5385 else
5387 if (ret_p)
5388 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5389 else
5390 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5395 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5396 the function address is in REGNO and the call and return thunk looks like:
5398 call L2
5400 pause
5401 lfence
5402 jmp L1
5404 mov %REG, (%sp)
5407 Otherwise, the function address is on the top of stack and the
5408 call and return thunk looks like:
5410 call L2
5412 pause
5413 lfence
5414 jmp L1
5416 lea WORD_SIZE(%sp), %sp
5420 static void
5421 output_indirect_thunk (unsigned int regno)
5423 char indirectlabel1[32];
5424 char indirectlabel2[32];
5426 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5427 indirectlabelno++);
5428 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5429 indirectlabelno++);
5431 /* Call */
5432 fputs ("\tcall\t", asm_out_file);
5433 assemble_name_raw (asm_out_file, indirectlabel2);
5434 fputc ('\n', asm_out_file);
5436 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5438 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5439 Usage of both pause + lfence is compromise solution. */
5440 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5442 /* Jump. */
5443 fputs ("\tjmp\t", asm_out_file);
5444 assemble_name_raw (asm_out_file, indirectlabel1);
5445 fputc ('\n', asm_out_file);
5447 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5449 /* The above call insn pushed a word to stack. Adjust CFI info. */
5450 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5452 if (! dwarf2out_do_cfi_asm ())
5454 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5455 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5456 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5457 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5459 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5460 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5461 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5462 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5463 dwarf2out_emit_cfi (xcfi);
5466 if (regno != INVALID_REGNUM)
5468 /* MOV. */
5469 rtx xops[2];
5470 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5471 xops[1] = gen_rtx_REG (word_mode, regno);
5472 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5474 else
5476 /* LEA. */
5477 rtx xops[2];
5478 xops[0] = stack_pointer_rtx;
5479 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5480 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5483 fputs ("\tret\n", asm_out_file);
5486 /* Output a funtion with a call and return thunk for indirect branch.
5487 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5488 Otherwise, the function address is on the top of stack. Thunk is
5489 used for function return if RET_P is true. */
5491 static void
5492 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5493 unsigned int regno, bool ret_p)
5495 char name[32];
5496 tree decl;
5498 /* Create __x86_indirect_thunk. */
5499 indirect_thunk_name (name, regno, need_prefix, ret_p);
5500 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5501 get_identifier (name),
5502 build_function_type_list (void_type_node, NULL_TREE));
5503 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5504 NULL_TREE, void_type_node);
5505 TREE_PUBLIC (decl) = 1;
5506 TREE_STATIC (decl) = 1;
5507 DECL_IGNORED_P (decl) = 1;
5509 #if TARGET_MACHO
5510 if (TARGET_MACHO)
5512 switch_to_section (darwin_sections[picbase_thunk_section]);
5513 fputs ("\t.weak_definition\t", asm_out_file);
5514 assemble_name (asm_out_file, name);
5515 fputs ("\n\t.private_extern\t", asm_out_file);
5516 assemble_name (asm_out_file, name);
5517 putc ('\n', asm_out_file);
5518 ASM_OUTPUT_LABEL (asm_out_file, name);
5519 DECL_WEAK (decl) = 1;
5521 else
5522 #endif
5523 if (USE_HIDDEN_LINKONCE)
5525 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5527 targetm.asm_out.unique_section (decl, 0);
5528 switch_to_section (get_named_section (decl, NULL, 0));
5530 targetm.asm_out.globalize_label (asm_out_file, name);
5531 fputs ("\t.hidden\t", asm_out_file);
5532 assemble_name (asm_out_file, name);
5533 putc ('\n', asm_out_file);
5534 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5536 else
5538 switch_to_section (text_section);
5539 ASM_OUTPUT_LABEL (asm_out_file, name);
5542 DECL_INITIAL (decl) = make_node (BLOCK);
5543 current_function_decl = decl;
5544 allocate_struct_function (decl, false);
5545 init_function_start (decl);
5546 /* We're about to hide the function body from callees of final_* by
5547 emitting it directly; tell them we're a thunk, if they care. */
5548 cfun->is_thunk = true;
5549 first_function_block_is_cold = false;
5550 /* Make sure unwind info is emitted for the thunk if needed. */
5551 final_start_function (emit_barrier (), asm_out_file, 1);
5553 output_indirect_thunk (regno);
5555 final_end_function ();
5556 init_insn_lengths ();
5557 free_after_compilation (cfun);
5558 set_cfun (NULL);
5559 current_function_decl = NULL;
5562 static int pic_labels_used;
5564 /* Fills in the label name that should be used for a pc thunk for
5565 the given register. */
5567 static void
5568 get_pc_thunk_name (char name[32], unsigned int regno)
5570 gcc_assert (!TARGET_64BIT);
5572 if (USE_HIDDEN_LINKONCE)
5573 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5574 else
5575 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5579 /* This function generates code for -fpic that loads %ebx with
5580 the return address of the caller and then returns. */
5582 static void
5583 ix86_code_end (void)
5585 rtx xops[2];
5586 unsigned int regno;
5588 if (indirect_return_needed)
5589 output_indirect_thunk_function (indirect_thunk_prefix_none,
5590 INVALID_REGNUM, true);
5591 if (indirect_return_via_cx)
5592 output_indirect_thunk_function (indirect_thunk_prefix_none,
5593 CX_REG, true);
5594 if (indirect_thunk_needed)
5595 output_indirect_thunk_function (indirect_thunk_prefix_none,
5596 INVALID_REGNUM, false);
5598 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5600 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5601 if ((indirect_thunks_used & (1 << i)))
5602 output_indirect_thunk_function (indirect_thunk_prefix_none,
5603 regno, false);
5606 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5608 char name[32];
5609 tree decl;
5611 if ((indirect_thunks_used & (1 << regno)))
5612 output_indirect_thunk_function (indirect_thunk_prefix_none,
5613 regno, false);
5615 if (!(pic_labels_used & (1 << regno)))
5616 continue;
5618 get_pc_thunk_name (name, regno);
5620 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5621 get_identifier (name),
5622 build_function_type_list (void_type_node, NULL_TREE));
5623 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5624 NULL_TREE, void_type_node);
5625 TREE_PUBLIC (decl) = 1;
5626 TREE_STATIC (decl) = 1;
5627 DECL_IGNORED_P (decl) = 1;
5629 #if TARGET_MACHO
5630 if (TARGET_MACHO)
5632 switch_to_section (darwin_sections[picbase_thunk_section]);
5633 fputs ("\t.weak_definition\t", asm_out_file);
5634 assemble_name (asm_out_file, name);
5635 fputs ("\n\t.private_extern\t", asm_out_file);
5636 assemble_name (asm_out_file, name);
5637 putc ('\n', asm_out_file);
5638 ASM_OUTPUT_LABEL (asm_out_file, name);
5639 DECL_WEAK (decl) = 1;
5641 else
5642 #endif
5643 if (USE_HIDDEN_LINKONCE)
5645 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5647 targetm.asm_out.unique_section (decl, 0);
5648 switch_to_section (get_named_section (decl, NULL, 0));
5650 targetm.asm_out.globalize_label (asm_out_file, name);
5651 fputs ("\t.hidden\t", asm_out_file);
5652 assemble_name (asm_out_file, name);
5653 putc ('\n', asm_out_file);
5654 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5656 else
5658 switch_to_section (text_section);
5659 ASM_OUTPUT_LABEL (asm_out_file, name);
5662 DECL_INITIAL (decl) = make_node (BLOCK);
5663 current_function_decl = decl;
5664 allocate_struct_function (decl, false);
5665 init_function_start (decl);
5666 /* We're about to hide the function body from callees of final_* by
5667 emitting it directly; tell them we're a thunk, if they care. */
5668 cfun->is_thunk = true;
5669 first_function_block_is_cold = false;
5670 /* Make sure unwind info is emitted for the thunk if needed. */
5671 final_start_function (emit_barrier (), asm_out_file, 1);
5673 /* Pad stack IP move with 4 instructions (two NOPs count
5674 as one instruction). */
5675 if (TARGET_PAD_SHORT_FUNCTION)
5677 int i = 8;
5679 while (i--)
5680 fputs ("\tnop\n", asm_out_file);
5683 xops[0] = gen_rtx_REG (Pmode, regno);
5684 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5685 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5686 output_asm_insn ("%!ret", NULL);
5687 final_end_function ();
5688 init_insn_lengths ();
5689 free_after_compilation (cfun);
5690 set_cfun (NULL);
5691 current_function_decl = NULL;
5694 if (flag_split_stack)
5695 file_end_indicate_split_stack ();
5698 /* Emit code for the SET_GOT patterns. */
5700 const char *
5701 output_set_got (rtx dest, rtx label)
5703 rtx xops[3];
5705 xops[0] = dest;
5707 if (TARGET_VXWORKS_RTP && flag_pic)
5709 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5710 xops[2] = gen_rtx_MEM (Pmode,
5711 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5712 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5714 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5715 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5716 an unadorned address. */
5717 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5718 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5719 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5720 return "";
5723 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5725 if (flag_pic)
5727 char name[32];
5728 get_pc_thunk_name (name, REGNO (dest));
5729 pic_labels_used |= 1 << REGNO (dest);
5731 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5732 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5733 output_asm_insn ("%!call\t%X2", xops);
5735 #if TARGET_MACHO
5736 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5737 This is what will be referenced by the Mach-O PIC subsystem. */
5738 if (machopic_should_output_picbase_label () || !label)
5739 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
5741 /* When we are restoring the pic base at the site of a nonlocal label,
5742 and we decided to emit the pic base above, we will still output a
5743 local label used for calculating the correction offset (even though
5744 the offset will be 0 in that case). */
5745 if (label)
5746 targetm.asm_out.internal_label (asm_out_file, "L",
5747 CODE_LABEL_NUMBER (label));
5748 #endif
5750 else
5752 if (TARGET_MACHO)
5753 /* We don't need a pic base, we're not producing pic. */
5754 gcc_unreachable ();
5756 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5757 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
5758 targetm.asm_out.internal_label (asm_out_file, "L",
5759 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5762 if (!TARGET_MACHO)
5763 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
5765 return "";
5768 /* Generate an "push" pattern for input ARG. */
5771 gen_push (rtx arg)
5773 struct machine_function *m = cfun->machine;
5775 if (m->fs.cfa_reg == stack_pointer_rtx)
5776 m->fs.cfa_offset += UNITS_PER_WORD;
5777 m->fs.sp_offset += UNITS_PER_WORD;
5779 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5780 arg = gen_rtx_REG (word_mode, REGNO (arg));
5782 return gen_rtx_SET (gen_rtx_MEM (word_mode,
5783 gen_rtx_PRE_DEC (Pmode,
5784 stack_pointer_rtx)),
5785 arg);
5788 /* Generate an "pop" pattern for input ARG. */
5791 gen_pop (rtx arg)
5793 if (REG_P (arg) && GET_MODE (arg) != word_mode)
5794 arg = gen_rtx_REG (word_mode, REGNO (arg));
5796 return gen_rtx_SET (arg,
5797 gen_rtx_MEM (word_mode,
5798 gen_rtx_POST_INC (Pmode,
5799 stack_pointer_rtx)));
5802 /* Return >= 0 if there is an unused call-clobbered register available
5803 for the entire function. */
5805 static unsigned int
5806 ix86_select_alt_pic_regnum (void)
5808 if (ix86_use_pseudo_pic_reg ())
5809 return INVALID_REGNUM;
5811 if (crtl->is_leaf
5812 && !crtl->profile
5813 && !ix86_current_function_calls_tls_descriptor)
5815 int i, drap;
5816 /* Can't use the same register for both PIC and DRAP. */
5817 if (crtl->drap_reg)
5818 drap = REGNO (crtl->drap_reg);
5819 else
5820 drap = -1;
5821 for (i = 2; i >= 0; --i)
5822 if (i != drap && !df_regs_ever_live_p (i))
5823 return i;
5826 return INVALID_REGNUM;
5829 /* Return true if REGNO is used by the epilogue. */
5831 bool
5832 ix86_epilogue_uses (int regno)
5834 /* If there are no caller-saved registers, we preserve all registers,
5835 except for MMX and x87 registers which aren't supported when saving
5836 and restoring registers. Don't explicitly save SP register since
5837 it is always preserved. */
5838 return (epilogue_completed
5839 && cfun->machine->no_caller_saved_registers
5840 && !fixed_regs[regno]
5841 && !STACK_REGNO_P (regno)
5842 && !MMX_REGNO_P (regno));
5845 /* Return nonzero if register REGNO can be used as a scratch register
5846 in peephole2. */
5848 static bool
5849 ix86_hard_regno_scratch_ok (unsigned int regno)
5851 /* If there are no caller-saved registers, we can't use any register
5852 as a scratch register after epilogue and use REGNO as scratch
5853 register only if it has been used before to avoid saving and
5854 restoring it. */
5855 return (!cfun->machine->no_caller_saved_registers
5856 || (!epilogue_completed
5857 && df_regs_ever_live_p (regno)));
5860 /* Return TRUE if we need to save REGNO. */
5862 bool
5863 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
5865 /* If there are no caller-saved registers, we preserve all registers,
5866 except for MMX and x87 registers which aren't supported when saving
5867 and restoring registers. Don't explicitly save SP register since
5868 it is always preserved. */
5869 if (cfun->machine->no_caller_saved_registers)
5871 /* Don't preserve registers used for function return value. */
5872 rtx reg = crtl->return_rtx;
5873 if (reg)
5875 unsigned int i = REGNO (reg);
5876 unsigned int nregs = REG_NREGS (reg);
5877 while (nregs-- > 0)
5878 if ((i + nregs) == regno)
5879 return false;
5882 return (df_regs_ever_live_p (regno)
5883 && !fixed_regs[regno]
5884 && !STACK_REGNO_P (regno)
5885 && !MMX_REGNO_P (regno)
5886 && (regno != HARD_FRAME_POINTER_REGNUM
5887 || !frame_pointer_needed));
5890 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
5891 && pic_offset_table_rtx)
5893 if (ix86_use_pseudo_pic_reg ())
5895 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5896 _mcount in prologue. */
5897 if (!TARGET_64BIT && flag_pic && crtl->profile)
5898 return true;
5900 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5901 || crtl->profile
5902 || crtl->calls_eh_return
5903 || crtl->uses_const_pool
5904 || cfun->has_nonlocal_label)
5905 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
5908 if (crtl->calls_eh_return && maybe_eh_return)
5910 unsigned i;
5911 for (i = 0; ; i++)
5913 unsigned test = EH_RETURN_DATA_REGNO (i);
5914 if (test == INVALID_REGNUM)
5915 break;
5916 if (test == regno)
5917 return true;
5921 if (ignore_outlined && cfun->machine->call_ms2sysv)
5923 unsigned count = cfun->machine->call_ms2sysv_extra_regs
5924 + xlogue_layout::MIN_REGS;
5925 if (xlogue_layout::is_stub_managed_reg (regno, count))
5926 return false;
5929 if (crtl->drap_reg
5930 && regno == REGNO (crtl->drap_reg)
5931 && !cfun->machine->no_drap_save_restore)
5932 return true;
5934 return (df_regs_ever_live_p (regno)
5935 && !call_used_or_fixed_reg_p (regno)
5936 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5939 /* Return number of saved general prupose registers. */
5941 static int
5942 ix86_nsaved_regs (void)
5944 int nregs = 0;
5945 int regno;
5947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5948 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5949 nregs ++;
5950 return nregs;
5953 /* Return number of saved SSE registers. */
5955 static int
5956 ix86_nsaved_sseregs (void)
5958 int nregs = 0;
5959 int regno;
5961 if (!TARGET_64BIT_MS_ABI)
5962 return 0;
5963 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5964 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
5965 nregs ++;
5966 return nregs;
5969 /* Given FROM and TO register numbers, say whether this elimination is
5970 allowed. If stack alignment is needed, we can only replace argument
5971 pointer with hard frame pointer, or replace frame pointer with stack
5972 pointer. Otherwise, frame pointer elimination is automatically
5973 handled and all other eliminations are valid. */
5975 static bool
5976 ix86_can_eliminate (const int from, const int to)
5978 if (stack_realign_fp)
5979 return ((from == ARG_POINTER_REGNUM
5980 && to == HARD_FRAME_POINTER_REGNUM)
5981 || (from == FRAME_POINTER_REGNUM
5982 && to == STACK_POINTER_REGNUM));
5983 else
5984 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
5987 /* Return the offset between two registers, one to be eliminated, and the other
5988 its replacement, at the start of a routine. */
5990 HOST_WIDE_INT
5991 ix86_initial_elimination_offset (int from, int to)
5993 struct ix86_frame &frame = cfun->machine->frame;
5995 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5996 return frame.hard_frame_pointer_offset;
5997 else if (from == FRAME_POINTER_REGNUM
5998 && to == HARD_FRAME_POINTER_REGNUM)
5999 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6000 else
6002 gcc_assert (to == STACK_POINTER_REGNUM);
6004 if (from == ARG_POINTER_REGNUM)
6005 return frame.stack_pointer_offset;
6007 gcc_assert (from == FRAME_POINTER_REGNUM);
6008 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6012 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6013 void warn_once_call_ms2sysv_xlogues (const char *feature)
6015 static bool warned_once = false;
6016 if (!warned_once)
6018 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6019 feature);
6020 warned_once = true;
6024 /* Return the probing interval for -fstack-clash-protection. */
6026 static HOST_WIDE_INT
6027 get_probe_interval (void)
6029 if (flag_stack_clash_protection)
6030 return (HOST_WIDE_INT_1U
6031 << param_stack_clash_protection_probe_interval);
6032 else
6033 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6036 /* When using -fsplit-stack, the allocation routines set a field in
6037 the TCB to the bottom of the stack plus this much space, measured
6038 in bytes. */
6040 #define SPLIT_STACK_AVAILABLE 256
6042 /* Fill structure ix86_frame about frame of currently computed function. */
6044 static void
6045 ix86_compute_frame_layout (void)
6047 struct ix86_frame *frame = &cfun->machine->frame;
6048 struct machine_function *m = cfun->machine;
6049 unsigned HOST_WIDE_INT stack_alignment_needed;
6050 HOST_WIDE_INT offset;
6051 unsigned HOST_WIDE_INT preferred_alignment;
6052 HOST_WIDE_INT size = ix86_get_frame_size ();
6053 HOST_WIDE_INT to_allocate;
6055 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6056 * ms_abi functions that call a sysv function. We now need to prune away
6057 * cases where it should be disabled. */
6058 if (TARGET_64BIT && m->call_ms2sysv)
6060 gcc_assert (TARGET_64BIT_MS_ABI);
6061 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6062 gcc_assert (!TARGET_SEH);
6063 gcc_assert (TARGET_SSE);
6064 gcc_assert (!ix86_using_red_zone ());
6066 if (crtl->calls_eh_return)
6068 gcc_assert (!reload_completed);
6069 m->call_ms2sysv = false;
6070 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6073 else if (ix86_static_chain_on_stack)
6075 gcc_assert (!reload_completed);
6076 m->call_ms2sysv = false;
6077 warn_once_call_ms2sysv_xlogues ("static call chains");
6080 /* Finally, compute which registers the stub will manage. */
6081 else
6083 unsigned count = xlogue_layout::count_stub_managed_regs ();
6084 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6085 m->call_ms2sysv_pad_in = 0;
6089 frame->nregs = ix86_nsaved_regs ();
6090 frame->nsseregs = ix86_nsaved_sseregs ();
6092 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6093 except for function prologues, leaf functions and when the defult
6094 incoming stack boundary is overriden at command line or via
6095 force_align_arg_pointer attribute.
6097 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6098 at call sites, including profile function calls.
6100 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6101 && crtl->preferred_stack_boundary < 128)
6102 && (!crtl->is_leaf || cfun->calls_alloca != 0
6103 || ix86_current_function_calls_tls_descriptor
6104 || (TARGET_MACHO && crtl->profile)
6105 || ix86_incoming_stack_boundary < 128))
6107 crtl->preferred_stack_boundary = 128;
6108 crtl->stack_alignment_needed = 128;
6111 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6112 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6114 gcc_assert (!size || stack_alignment_needed);
6115 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6116 gcc_assert (preferred_alignment <= stack_alignment_needed);
6118 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6119 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6120 if (TARGET_64BIT && m->call_ms2sysv)
6122 gcc_assert (stack_alignment_needed >= 16);
6123 gcc_assert (!frame->nsseregs);
6126 /* For SEH we have to limit the amount of code movement into the prologue.
6127 At present we do this via a BLOCKAGE, at which point there's very little
6128 scheduling that can be done, which means that there's very little point
6129 in doing anything except PUSHs. */
6130 if (TARGET_SEH)
6131 m->use_fast_prologue_epilogue = false;
6132 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6134 int count = frame->nregs;
6135 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6137 /* The fast prologue uses move instead of push to save registers. This
6138 is significantly longer, but also executes faster as modern hardware
6139 can execute the moves in parallel, but can't do that for push/pop.
6141 Be careful about choosing what prologue to emit: When function takes
6142 many instructions to execute we may use slow version as well as in
6143 case function is known to be outside hot spot (this is known with
6144 feedback only). Weight the size of function by number of registers
6145 to save as it is cheap to use one or two push instructions but very
6146 slow to use many of them.
6148 Calling this hook multiple times with the same frame requirements
6149 must produce the same layout, since the RA might otherwise be
6150 unable to reach a fixed point or might fail its final sanity checks.
6151 This means that once we've assumed that a function does or doesn't
6152 have a particular size, we have to stick to that assumption
6153 regardless of how the function has changed since. */
6154 if (count)
6155 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6156 if (node->frequency < NODE_FREQUENCY_NORMAL
6157 || (flag_branch_probabilities
6158 && node->frequency < NODE_FREQUENCY_HOT))
6159 m->use_fast_prologue_epilogue = false;
6160 else
6162 if (count != frame->expensive_count)
6164 frame->expensive_count = count;
6165 frame->expensive_p = expensive_function_p (count);
6167 m->use_fast_prologue_epilogue = !frame->expensive_p;
6171 frame->save_regs_using_mov
6172 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6174 /* Skip return address and error code in exception handler. */
6175 offset = INCOMING_FRAME_SP_OFFSET;
6177 /* Skip pushed static chain. */
6178 if (ix86_static_chain_on_stack)
6179 offset += UNITS_PER_WORD;
6181 /* Skip saved base pointer. */
6182 if (frame_pointer_needed)
6183 offset += UNITS_PER_WORD;
6184 frame->hfp_save_offset = offset;
6186 /* The traditional frame pointer location is at the top of the frame. */
6187 frame->hard_frame_pointer_offset = offset;
6189 /* Register save area */
6190 offset += frame->nregs * UNITS_PER_WORD;
6191 frame->reg_save_offset = offset;
6193 /* On SEH target, registers are pushed just before the frame pointer
6194 location. */
6195 if (TARGET_SEH)
6196 frame->hard_frame_pointer_offset = offset;
6198 /* Calculate the size of the va-arg area (not including padding, if any). */
6199 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6201 /* Also adjust stack_realign_offset for the largest alignment of
6202 stack slot actually used. */
6203 if (stack_realign_fp
6204 || (cfun->machine->max_used_stack_alignment != 0
6205 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6207 /* We may need a 16-byte aligned stack for the remainder of the
6208 register save area, but the stack frame for the local function
6209 may require a greater alignment if using AVX/2/512. In order
6210 to avoid wasting space, we first calculate the space needed for
6211 the rest of the register saves, add that to the stack pointer,
6212 and then realign the stack to the boundary of the start of the
6213 frame for the local function. */
6214 HOST_WIDE_INT space_needed = 0;
6215 HOST_WIDE_INT sse_reg_space_needed = 0;
6217 if (TARGET_64BIT)
6219 if (m->call_ms2sysv)
6221 m->call_ms2sysv_pad_in = 0;
6222 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6225 else if (frame->nsseregs)
6226 /* The only ABI that has saved SSE registers (Win64) also has a
6227 16-byte aligned default stack. However, many programs violate
6228 the ABI, and Wine64 forces stack realignment to compensate. */
6229 space_needed = frame->nsseregs * 16;
6231 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6233 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6234 rounding to be pedantic. */
6235 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6237 else
6238 space_needed = frame->va_arg_size;
6240 /* Record the allocation size required prior to the realignment AND. */
6241 frame->stack_realign_allocate = space_needed;
6243 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6244 before this point are not directly comparable with values below
6245 this point. Use sp_valid_at to determine if the stack pointer is
6246 valid for a given offset, fp_valid_at for the frame pointer, or
6247 choose_baseaddr to have a base register chosen for you.
6249 Note that the result of (frame->stack_realign_offset
6250 & (stack_alignment_needed - 1)) may not equal zero. */
6251 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6252 frame->stack_realign_offset = offset - space_needed;
6253 frame->sse_reg_save_offset = frame->stack_realign_offset
6254 + sse_reg_space_needed;
6256 else
6258 frame->stack_realign_offset = offset;
6260 if (TARGET_64BIT && m->call_ms2sysv)
6262 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6263 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6266 /* Align and set SSE register save area. */
6267 else if (frame->nsseregs)
6269 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6270 required and the DRAP re-alignment boundary is at least 16 bytes,
6271 then we want the SSE register save area properly aligned. */
6272 if (ix86_incoming_stack_boundary >= 128
6273 || (stack_realign_drap && stack_alignment_needed >= 16))
6274 offset = ROUND_UP (offset, 16);
6275 offset += frame->nsseregs * 16;
6277 frame->sse_reg_save_offset = offset;
6278 offset += frame->va_arg_size;
6281 /* Align start of frame for local function. When a function call
6282 is removed, it may become a leaf function. But if argument may
6283 be passed on stack, we need to align the stack when there is no
6284 tail call. */
6285 if (m->call_ms2sysv
6286 || frame->va_arg_size != 0
6287 || size != 0
6288 || !crtl->is_leaf
6289 || (!crtl->tail_call_emit
6290 && cfun->machine->outgoing_args_on_stack)
6291 || cfun->calls_alloca
6292 || ix86_current_function_calls_tls_descriptor)
6293 offset = ROUND_UP (offset, stack_alignment_needed);
6295 /* Frame pointer points here. */
6296 frame->frame_pointer_offset = offset;
6298 offset += size;
6300 /* Add outgoing arguments area. Can be skipped if we eliminated
6301 all the function calls as dead code.
6302 Skipping is however impossible when function calls alloca. Alloca
6303 expander assumes that last crtl->outgoing_args_size
6304 of stack frame are unused. */
6305 if (ACCUMULATE_OUTGOING_ARGS
6306 && (!crtl->is_leaf || cfun->calls_alloca
6307 || ix86_current_function_calls_tls_descriptor))
6309 offset += crtl->outgoing_args_size;
6310 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6312 else
6313 frame->outgoing_arguments_size = 0;
6315 /* Align stack boundary. Only needed if we're calling another function
6316 or using alloca. */
6317 if (!crtl->is_leaf || cfun->calls_alloca
6318 || ix86_current_function_calls_tls_descriptor)
6319 offset = ROUND_UP (offset, preferred_alignment);
6321 /* We've reached end of stack frame. */
6322 frame->stack_pointer_offset = offset;
6324 /* Size prologue needs to allocate. */
6325 to_allocate = offset - frame->sse_reg_save_offset;
6327 if ((!to_allocate && frame->nregs <= 1)
6328 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6329 /* If static stack checking is enabled and done with probes,
6330 the registers need to be saved before allocating the frame. */
6331 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6332 /* If stack clash probing needs a loop, then it needs a
6333 scratch register. But the returned register is only guaranteed
6334 to be safe to use after register saves are complete. So if
6335 stack clash protections are enabled and the allocated frame is
6336 larger than the probe interval, then use pushes to save
6337 callee saved registers. */
6338 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6339 frame->save_regs_using_mov = false;
6341 if (ix86_using_red_zone ()
6342 && crtl->sp_is_unchanging
6343 && crtl->is_leaf
6344 && !ix86_pc_thunk_call_expanded
6345 && !ix86_current_function_calls_tls_descriptor)
6347 frame->red_zone_size = to_allocate;
6348 if (frame->save_regs_using_mov)
6349 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6350 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6351 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6353 else
6354 frame->red_zone_size = 0;
6355 frame->stack_pointer_offset -= frame->red_zone_size;
6357 /* The SEH frame pointer location is near the bottom of the frame.
6358 This is enforced by the fact that the difference between the
6359 stack pointer and the frame pointer is limited to 240 bytes in
6360 the unwind data structure. */
6361 if (TARGET_SEH)
6363 HOST_WIDE_INT diff;
6365 /* If we can leave the frame pointer where it is, do so. Also, returns
6366 the establisher frame for __builtin_frame_address (0). */
6367 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6368 if (diff <= SEH_MAX_FRAME_SIZE
6369 && (diff > 240 || (diff & 15) != 0)
6370 && !crtl->accesses_prior_frames)
6372 /* Ideally we'd determine what portion of the local stack frame
6373 (within the constraint of the lowest 240) is most heavily used.
6374 But without that complication, simply bias the frame pointer
6375 by 128 bytes so as to maximize the amount of the local stack
6376 frame that is addressable with 8-bit offsets. */
6377 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6382 /* This is semi-inlined memory_address_length, but simplified
6383 since we know that we're always dealing with reg+offset, and
6384 to avoid having to create and discard all that rtl. */
6386 static inline int
6387 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6389 int len = 4;
6391 if (offset == 0)
6393 /* EBP and R13 cannot be encoded without an offset. */
6394 len = (regno == BP_REG || regno == R13_REG);
6396 else if (IN_RANGE (offset, -128, 127))
6397 len = 1;
6399 /* ESP and R12 must be encoded with a SIB byte. */
6400 if (regno == SP_REG || regno == R12_REG)
6401 len++;
6403 return len;
6406 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6407 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6409 static bool
6410 sp_valid_at (HOST_WIDE_INT cfa_offset)
6412 const struct machine_frame_state &fs = cfun->machine->fs;
6413 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6415 /* Validate that the cfa_offset isn't in a "no-man's land". */
6416 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6417 return false;
6419 return fs.sp_valid;
6422 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6423 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6425 static inline bool
6426 fp_valid_at (HOST_WIDE_INT cfa_offset)
6428 const struct machine_frame_state &fs = cfun->machine->fs;
6429 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6431 /* Validate that the cfa_offset isn't in a "no-man's land". */
6432 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6433 return false;
6435 return fs.fp_valid;
6438 /* Choose a base register based upon alignment requested, speed and/or
6439 size. */
6441 static void
6442 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6443 HOST_WIDE_INT &base_offset,
6444 unsigned int align_reqested, unsigned int *align)
6446 const struct machine_function *m = cfun->machine;
6447 unsigned int hfp_align;
6448 unsigned int drap_align;
6449 unsigned int sp_align;
6450 bool hfp_ok = fp_valid_at (cfa_offset);
6451 bool drap_ok = m->fs.drap_valid;
6452 bool sp_ok = sp_valid_at (cfa_offset);
6454 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6456 /* Filter out any registers that don't meet the requested alignment
6457 criteria. */
6458 if (align_reqested)
6460 if (m->fs.realigned)
6461 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6462 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6463 notes (which we would need to use a realigned stack pointer),
6464 so disable on SEH targets. */
6465 else if (m->fs.sp_realigned)
6466 sp_align = crtl->stack_alignment_needed;
6468 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6469 drap_ok = drap_ok && drap_align >= align_reqested;
6470 sp_ok = sp_ok && sp_align >= align_reqested;
6473 if (m->use_fast_prologue_epilogue)
6475 /* Choose the base register most likely to allow the most scheduling
6476 opportunities. Generally FP is valid throughout the function,
6477 while DRAP must be reloaded within the epilogue. But choose either
6478 over the SP due to increased encoding size. */
6480 if (hfp_ok)
6482 base_reg = hard_frame_pointer_rtx;
6483 base_offset = m->fs.fp_offset - cfa_offset;
6485 else if (drap_ok)
6487 base_reg = crtl->drap_reg;
6488 base_offset = 0 - cfa_offset;
6490 else if (sp_ok)
6492 base_reg = stack_pointer_rtx;
6493 base_offset = m->fs.sp_offset - cfa_offset;
6496 else
6498 HOST_WIDE_INT toffset;
6499 int len = 16, tlen;
6501 /* Choose the base register with the smallest address encoding.
6502 With a tie, choose FP > DRAP > SP. */
6503 if (sp_ok)
6505 base_reg = stack_pointer_rtx;
6506 base_offset = m->fs.sp_offset - cfa_offset;
6507 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6509 if (drap_ok)
6511 toffset = 0 - cfa_offset;
6512 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6513 if (tlen <= len)
6515 base_reg = crtl->drap_reg;
6516 base_offset = toffset;
6517 len = tlen;
6520 if (hfp_ok)
6522 toffset = m->fs.fp_offset - cfa_offset;
6523 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6524 if (tlen <= len)
6526 base_reg = hard_frame_pointer_rtx;
6527 base_offset = toffset;
6532 /* Set the align return value. */
6533 if (align)
6535 if (base_reg == stack_pointer_rtx)
6536 *align = sp_align;
6537 else if (base_reg == crtl->drap_reg)
6538 *align = drap_align;
6539 else if (base_reg == hard_frame_pointer_rtx)
6540 *align = hfp_align;
6544 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6545 the alignment of address. If ALIGN is non-null, it should point to
6546 an alignment value (in bits) that is preferred or zero and will
6547 recieve the alignment of the base register that was selected,
6548 irrespective of rather or not CFA_OFFSET is a multiple of that
6549 alignment value. If it is possible for the base register offset to be
6550 non-immediate then SCRATCH_REGNO should specify a scratch register to
6551 use.
6553 The valid base registers are taken from CFUN->MACHINE->FS. */
6555 static rtx
6556 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6557 unsigned int scratch_regno = INVALID_REGNUM)
6559 rtx base_reg = NULL;
6560 HOST_WIDE_INT base_offset = 0;
6562 /* If a specific alignment is requested, try to get a base register
6563 with that alignment first. */
6564 if (align && *align)
6565 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6567 if (!base_reg)
6568 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6570 gcc_assert (base_reg != NULL);
6572 rtx base_offset_rtx = GEN_INT (base_offset);
6574 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6576 gcc_assert (scratch_regno != INVALID_REGNUM);
6578 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6579 emit_move_insn (scratch_reg, base_offset_rtx);
6581 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6584 return plus_constant (Pmode, base_reg, base_offset);
6587 /* Emit code to save registers in the prologue. */
6589 static void
6590 ix86_emit_save_regs (void)
6592 unsigned int regno;
6593 rtx_insn *insn;
6595 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6596 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6598 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6599 RTX_FRAME_RELATED_P (insn) = 1;
6603 /* Emit a single register save at CFA - CFA_OFFSET. */
6605 static void
6606 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6607 HOST_WIDE_INT cfa_offset)
6609 struct machine_function *m = cfun->machine;
6610 rtx reg = gen_rtx_REG (mode, regno);
6611 rtx mem, addr, base, insn;
6612 unsigned int align = GET_MODE_ALIGNMENT (mode);
6614 addr = choose_baseaddr (cfa_offset, &align);
6615 mem = gen_frame_mem (mode, addr);
6617 /* The location aligment depends upon the base register. */
6618 align = MIN (GET_MODE_ALIGNMENT (mode), align);
6619 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6620 set_mem_align (mem, align);
6622 insn = emit_insn (gen_rtx_SET (mem, reg));
6623 RTX_FRAME_RELATED_P (insn) = 1;
6625 base = addr;
6626 if (GET_CODE (base) == PLUS)
6627 base = XEXP (base, 0);
6628 gcc_checking_assert (REG_P (base));
6630 /* When saving registers into a re-aligned local stack frame, avoid
6631 any tricky guessing by dwarf2out. */
6632 if (m->fs.realigned)
6634 gcc_checking_assert (stack_realign_drap);
6636 if (regno == REGNO (crtl->drap_reg))
6638 /* A bit of a hack. We force the DRAP register to be saved in
6639 the re-aligned stack frame, which provides us with a copy
6640 of the CFA that will last past the prologue. Install it. */
6641 gcc_checking_assert (cfun->machine->fs.fp_valid);
6642 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6643 cfun->machine->fs.fp_offset - cfa_offset);
6644 mem = gen_rtx_MEM (mode, addr);
6645 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6647 else
6649 /* The frame pointer is a stable reference within the
6650 aligned frame. Use it. */
6651 gcc_checking_assert (cfun->machine->fs.fp_valid);
6652 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6653 cfun->machine->fs.fp_offset - cfa_offset);
6654 mem = gen_rtx_MEM (mode, addr);
6655 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6659 else if (base == stack_pointer_rtx && m->fs.sp_realigned
6660 && cfa_offset >= m->fs.sp_realigned_offset)
6662 gcc_checking_assert (stack_realign_fp);
6663 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6666 /* The memory may not be relative to the current CFA register,
6667 which means that we may need to generate a new pattern for
6668 use by the unwind info. */
6669 else if (base != m->fs.cfa_reg)
6671 addr = plus_constant (Pmode, m->fs.cfa_reg,
6672 m->fs.cfa_offset - cfa_offset);
6673 mem = gen_rtx_MEM (mode, addr);
6674 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6678 /* Emit code to save registers using MOV insns.
6679 First register is stored at CFA - CFA_OFFSET. */
6680 static void
6681 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6683 unsigned int regno;
6685 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6686 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6688 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6689 cfa_offset -= UNITS_PER_WORD;
6693 /* Emit code to save SSE registers using MOV insns.
6694 First register is stored at CFA - CFA_OFFSET. */
6695 static void
6696 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6698 unsigned int regno;
6700 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6701 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6703 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
6704 cfa_offset -= GET_MODE_SIZE (V4SFmode);
6708 static GTY(()) rtx queued_cfa_restores;
6710 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6711 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
6712 Don't add the note if the previously saved value will be left untouched
6713 within stack red-zone till return, as unwinders can find the same value
6714 in the register and on the stack. */
6716 static void
6717 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
6719 if (!crtl->shrink_wrapped
6720 && cfa_offset <= cfun->machine->fs.red_zone_offset)
6721 return;
6723 if (insn)
6725 add_reg_note (insn, REG_CFA_RESTORE, reg);
6726 RTX_FRAME_RELATED_P (insn) = 1;
6728 else
6729 queued_cfa_restores
6730 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
6733 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
6735 static void
6736 ix86_add_queued_cfa_restore_notes (rtx insn)
6738 rtx last;
6739 if (!queued_cfa_restores)
6740 return;
6741 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
6743 XEXP (last, 1) = REG_NOTES (insn);
6744 REG_NOTES (insn) = queued_cfa_restores;
6745 queued_cfa_restores = NULL_RTX;
6746 RTX_FRAME_RELATED_P (insn) = 1;
6749 /* Expand prologue or epilogue stack adjustment.
6750 The pattern exist to put a dependency on all ebp-based memory accesses.
6751 STYLE should be negative if instructions should be marked as frame related,
6752 zero if %r11 register is live and cannot be freely used and positive
6753 otherwise. */
6755 static rtx
6756 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
6757 int style, bool set_cfa)
6759 struct machine_function *m = cfun->machine;
6760 rtx addend = offset;
6761 rtx insn;
6762 bool add_frame_related_expr = false;
6764 if (!x86_64_immediate_operand (offset, Pmode))
6766 /* r11 is used by indirect sibcall return as well, set before the
6767 epilogue and used after the epilogue. */
6768 if (style)
6769 addend = gen_rtx_REG (Pmode, R11_REG);
6770 else
6772 gcc_assert (src != hard_frame_pointer_rtx
6773 && dest != hard_frame_pointer_rtx);
6774 addend = hard_frame_pointer_rtx;
6776 emit_insn (gen_rtx_SET (addend, offset));
6777 if (style < 0)
6778 add_frame_related_expr = true;
6781 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
6782 (Pmode, dest, src, addend));
6783 if (style >= 0)
6784 ix86_add_queued_cfa_restore_notes (insn);
6786 if (set_cfa)
6788 rtx r;
6790 gcc_assert (m->fs.cfa_reg == src);
6791 m->fs.cfa_offset += INTVAL (offset);
6792 m->fs.cfa_reg = dest;
6794 r = gen_rtx_PLUS (Pmode, src, offset);
6795 r = gen_rtx_SET (dest, r);
6796 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
6797 RTX_FRAME_RELATED_P (insn) = 1;
6799 else if (style < 0)
6801 RTX_FRAME_RELATED_P (insn) = 1;
6802 if (add_frame_related_expr)
6804 rtx r = gen_rtx_PLUS (Pmode, src, offset);
6805 r = gen_rtx_SET (dest, r);
6806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
6810 if (dest == stack_pointer_rtx)
6812 HOST_WIDE_INT ooffset = m->fs.sp_offset;
6813 bool valid = m->fs.sp_valid;
6814 bool realigned = m->fs.sp_realigned;
6816 if (src == hard_frame_pointer_rtx)
6818 valid = m->fs.fp_valid;
6819 realigned = false;
6820 ooffset = m->fs.fp_offset;
6822 else if (src == crtl->drap_reg)
6824 valid = m->fs.drap_valid;
6825 realigned = false;
6826 ooffset = 0;
6828 else
6830 /* Else there are two possibilities: SP itself, which we set
6831 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
6832 taken care of this by hand along the eh_return path. */
6833 gcc_checking_assert (src == stack_pointer_rtx
6834 || offset == const0_rtx);
6837 m->fs.sp_offset = ooffset - INTVAL (offset);
6838 m->fs.sp_valid = valid;
6839 m->fs.sp_realigned = realigned;
6841 return insn;
6844 /* Find an available register to be used as dynamic realign argument
6845 pointer regsiter. Such a register will be written in prologue and
6846 used in begin of body, so it must not be
6847 1. parameter passing register.
6848 2. GOT pointer.
6849 We reuse static-chain register if it is available. Otherwise, we
6850 use DI for i386 and R13 for x86-64. We chose R13 since it has
6851 shorter encoding.
6853 Return: the regno of chosen register. */
6855 static unsigned int
6856 find_drap_reg (void)
6858 tree decl = cfun->decl;
6860 /* Always use callee-saved register if there are no caller-saved
6861 registers. */
6862 if (TARGET_64BIT)
6864 /* Use R13 for nested function or function need static chain.
6865 Since function with tail call may use any caller-saved
6866 registers in epilogue, DRAP must not use caller-saved
6867 register in such case. */
6868 if (DECL_STATIC_CHAIN (decl)
6869 || cfun->machine->no_caller_saved_registers
6870 || crtl->tail_call_emit)
6871 return R13_REG;
6873 return R10_REG;
6875 else
6877 /* Use DI for nested function or function need static chain.
6878 Since function with tail call may use any caller-saved
6879 registers in epilogue, DRAP must not use caller-saved
6880 register in such case. */
6881 if (DECL_STATIC_CHAIN (decl)
6882 || cfun->machine->no_caller_saved_registers
6883 || crtl->tail_call_emit)
6884 return DI_REG;
6886 /* Reuse static chain register if it isn't used for parameter
6887 passing. */
6888 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
6890 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
6891 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
6892 return CX_REG;
6894 return DI_REG;
6898 /* Return minimum incoming stack alignment. */
6900 static unsigned int
6901 ix86_minimum_incoming_stack_boundary (bool sibcall)
6903 unsigned int incoming_stack_boundary;
6905 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
6906 if (cfun->machine->func_type != TYPE_NORMAL)
6907 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
6908 /* Prefer the one specified at command line. */
6909 else if (ix86_user_incoming_stack_boundary)
6910 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
6911 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6912 if -mstackrealign is used, it isn't used for sibcall check and
6913 estimated stack alignment is 128bit. */
6914 else if (!sibcall
6915 && ix86_force_align_arg_pointer
6916 && crtl->stack_alignment_estimated == 128)
6917 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6918 else
6919 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
6921 /* Incoming stack alignment can be changed on individual functions
6922 via force_align_arg_pointer attribute. We use the smallest
6923 incoming stack boundary. */
6924 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
6925 && lookup_attribute ("force_align_arg_pointer",
6926 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
6927 incoming_stack_boundary = MIN_STACK_BOUNDARY;
6929 /* The incoming stack frame has to be aligned at least at
6930 parm_stack_boundary. */
6931 if (incoming_stack_boundary < crtl->parm_stack_boundary)
6932 incoming_stack_boundary = crtl->parm_stack_boundary;
6934 /* Stack at entrance of main is aligned by runtime. We use the
6935 smallest incoming stack boundary. */
6936 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
6937 && DECL_NAME (current_function_decl)
6938 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6939 && DECL_FILE_SCOPE_P (current_function_decl))
6940 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
6942 return incoming_stack_boundary;
6945 /* Update incoming stack boundary and estimated stack alignment. */
6947 static void
6948 ix86_update_stack_boundary (void)
6950 ix86_incoming_stack_boundary
6951 = ix86_minimum_incoming_stack_boundary (false);
6953 /* x86_64 vararg needs 16byte stack alignment for register save area. */
6954 if (TARGET_64BIT
6955 && cfun->stdarg
6956 && crtl->stack_alignment_estimated < 128)
6957 crtl->stack_alignment_estimated = 128;
6959 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
6960 if (ix86_tls_descriptor_calls_expanded_in_cfun
6961 && crtl->preferred_stack_boundary < 128)
6962 crtl->preferred_stack_boundary = 128;
6965 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
6966 needed or an rtx for DRAP otherwise. */
6968 static rtx
6969 ix86_get_drap_rtx (void)
6971 /* We must use DRAP if there are outgoing arguments on stack and
6972 ACCUMULATE_OUTGOING_ARGS is false. */
6973 if (ix86_force_drap
6974 || (cfun->machine->outgoing_args_on_stack
6975 && !ACCUMULATE_OUTGOING_ARGS))
6976 crtl->need_drap = true;
6978 if (stack_realign_drap)
6980 /* Assign DRAP to vDRAP and returns vDRAP */
6981 unsigned int regno = find_drap_reg ();
6982 rtx drap_vreg;
6983 rtx arg_ptr;
6984 rtx_insn *seq, *insn;
6986 arg_ptr = gen_rtx_REG (Pmode, regno);
6987 crtl->drap_reg = arg_ptr;
6989 start_sequence ();
6990 drap_vreg = copy_to_reg (arg_ptr);
6991 seq = get_insns ();
6992 end_sequence ();
6994 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
6995 if (!optimize)
6997 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
6998 RTX_FRAME_RELATED_P (insn) = 1;
7000 return drap_vreg;
7002 else
7003 return NULL;
7006 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7008 static rtx
7009 ix86_internal_arg_pointer (void)
7011 return virtual_incoming_args_rtx;
7014 struct scratch_reg {
7015 rtx reg;
7016 bool saved;
7019 /* Return a short-lived scratch register for use on function entry.
7020 In 32-bit mode, it is valid only after the registers are saved
7021 in the prologue. This register must be released by means of
7022 release_scratch_register_on_entry once it is dead. */
7024 static void
7025 get_scratch_register_on_entry (struct scratch_reg *sr)
7027 int regno;
7029 sr->saved = false;
7031 if (TARGET_64BIT)
7033 /* We always use R11 in 64-bit mode. */
7034 regno = R11_REG;
7036 else
7038 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7039 bool fastcall_p
7040 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7041 bool thiscall_p
7042 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7043 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7044 int regparm = ix86_function_regparm (fntype, decl);
7045 int drap_regno
7046 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7048 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7049 for the static chain register. */
7050 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7051 && drap_regno != AX_REG)
7052 regno = AX_REG;
7053 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7054 for the static chain register. */
7055 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7056 regno = AX_REG;
7057 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7058 regno = DX_REG;
7059 /* ecx is the static chain register. */
7060 else if (regparm < 3 && !fastcall_p && !thiscall_p
7061 && !static_chain_p
7062 && drap_regno != CX_REG)
7063 regno = CX_REG;
7064 else if (ix86_save_reg (BX_REG, true, false))
7065 regno = BX_REG;
7066 /* esi is the static chain register. */
7067 else if (!(regparm == 3 && static_chain_p)
7068 && ix86_save_reg (SI_REG, true, false))
7069 regno = SI_REG;
7070 else if (ix86_save_reg (DI_REG, true, false))
7071 regno = DI_REG;
7072 else
7074 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7075 sr->saved = true;
7079 sr->reg = gen_rtx_REG (Pmode, regno);
7080 if (sr->saved)
7082 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7083 RTX_FRAME_RELATED_P (insn) = 1;
7087 /* Release a scratch register obtained from the preceding function.
7089 If RELEASE_VIA_POP is true, we just pop the register off the stack
7090 to release it. This is what non-Linux systems use with -fstack-check.
7092 Otherwise we use OFFSET to locate the saved register and the
7093 allocated stack space becomes part of the local frame and is
7094 deallocated by the epilogue. */
7096 static void
7097 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7098 bool release_via_pop)
7100 if (sr->saved)
7102 if (release_via_pop)
7104 struct machine_function *m = cfun->machine;
7105 rtx x, insn = emit_insn (gen_pop (sr->reg));
7107 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7108 RTX_FRAME_RELATED_P (insn) = 1;
7109 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7110 x = gen_rtx_SET (stack_pointer_rtx, x);
7111 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7112 m->fs.sp_offset -= UNITS_PER_WORD;
7114 else
7116 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7117 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7118 emit_insn (x);
7123 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7125 If INT_REGISTERS_SAVED is true, then integer registers have already been
7126 pushed on the stack.
7128 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7129 beyond SIZE bytes.
7131 This assumes no knowledge of the current probing state, i.e. it is never
7132 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7133 a suitable probe. */
7135 static void
7136 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7137 const bool int_registers_saved,
7138 const bool protection_area)
7140 struct machine_function *m = cfun->machine;
7142 /* If this function does not statically allocate stack space, then
7143 no probes are needed. */
7144 if (!size)
7146 /* However, the allocation of space via pushes for register
7147 saves could be viewed as allocating space, but without the
7148 need to probe. */
7149 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7150 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7151 else
7152 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7153 return;
7156 /* If we are a noreturn function, then we have to consider the
7157 possibility that we're called via a jump rather than a call.
7159 Thus we don't have the implicit probe generated by saving the
7160 return address into the stack at the call. Thus, the stack
7161 pointer could be anywhere in the guard page. The safe thing
7162 to do is emit a probe now.
7164 The probe can be avoided if we have already emitted any callee
7165 register saves into the stack or have a frame pointer (which will
7166 have been saved as well). Those saves will function as implicit
7167 probes.
7169 ?!? This should be revamped to work like aarch64 and s390 where
7170 we track the offset from the most recent probe. Normally that
7171 offset would be zero. For a noreturn function we would reset
7172 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7173 we just probe when we cross PROBE_INTERVAL. */
7174 if (TREE_THIS_VOLATILE (cfun->decl)
7175 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7177 /* We can safely use any register here since we're just going to push
7178 its value and immediately pop it back. But we do try and avoid
7179 argument passing registers so as not to introduce dependencies in
7180 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7181 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7182 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7183 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7184 m->fs.sp_offset -= UNITS_PER_WORD;
7185 if (m->fs.cfa_reg == stack_pointer_rtx)
7187 m->fs.cfa_offset -= UNITS_PER_WORD;
7188 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7189 x = gen_rtx_SET (stack_pointer_rtx, x);
7190 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7191 RTX_FRAME_RELATED_P (insn_push) = 1;
7192 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7193 x = gen_rtx_SET (stack_pointer_rtx, x);
7194 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7195 RTX_FRAME_RELATED_P (insn_pop) = 1;
7197 emit_insn (gen_blockage ());
7200 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7201 const int dope = 4 * UNITS_PER_WORD;
7203 /* If there is protection area, take it into account in the size. */
7204 if (protection_area)
7205 size += probe_interval + dope;
7207 /* If we allocate less than the size of the guard statically,
7208 then no probing is necessary, but we do need to allocate
7209 the stack. */
7210 else if (size < (1 << param_stack_clash_protection_guard_size))
7212 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7213 GEN_INT (-size), -1,
7214 m->fs.cfa_reg == stack_pointer_rtx);
7215 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7216 return;
7219 /* We're allocating a large enough stack frame that we need to
7220 emit probes. Either emit them inline or in a loop depending
7221 on the size. */
7222 if (size <= 4 * probe_interval)
7224 HOST_WIDE_INT i;
7225 for (i = probe_interval; i <= size; i += probe_interval)
7227 /* Allocate PROBE_INTERVAL bytes. */
7228 rtx insn
7229 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7230 GEN_INT (-probe_interval), -1,
7231 m->fs.cfa_reg == stack_pointer_rtx);
7232 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7234 /* And probe at *sp. */
7235 emit_stack_probe (stack_pointer_rtx);
7236 emit_insn (gen_blockage ());
7239 /* We need to allocate space for the residual, but we do not need
7240 to probe the residual... */
7241 HOST_WIDE_INT residual = (i - probe_interval - size);
7242 if (residual)
7244 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7245 GEN_INT (residual), -1,
7246 m->fs.cfa_reg == stack_pointer_rtx);
7248 /* ...except if there is a protection area to maintain. */
7249 if (protection_area)
7250 emit_stack_probe (stack_pointer_rtx);
7253 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7255 else
7257 /* We expect the GP registers to be saved when probes are used
7258 as the probing sequences might need a scratch register and
7259 the routine to allocate one assumes the integer registers
7260 have already been saved. */
7261 gcc_assert (int_registers_saved);
7263 struct scratch_reg sr;
7264 get_scratch_register_on_entry (&sr);
7266 /* If we needed to save a register, then account for any space
7267 that was pushed (we are not going to pop the register when
7268 we do the restore). */
7269 if (sr.saved)
7270 size -= UNITS_PER_WORD;
7272 /* Step 1: round SIZE down to a multiple of the interval. */
7273 HOST_WIDE_INT rounded_size = size & -probe_interval;
7275 /* Step 2: compute final value of the loop counter. Use lea if
7276 possible. */
7277 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7278 rtx insn;
7279 if (address_no_seg_operand (addr, Pmode))
7280 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7281 else
7283 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7284 insn = emit_insn (gen_rtx_SET (sr.reg,
7285 gen_rtx_PLUS (Pmode, sr.reg,
7286 stack_pointer_rtx)));
7288 if (m->fs.cfa_reg == stack_pointer_rtx)
7290 add_reg_note (insn, REG_CFA_DEF_CFA,
7291 plus_constant (Pmode, sr.reg,
7292 m->fs.cfa_offset + rounded_size));
7293 RTX_FRAME_RELATED_P (insn) = 1;
7296 /* Step 3: the loop. */
7297 rtx size_rtx = GEN_INT (rounded_size);
7298 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7299 size_rtx));
7300 if (m->fs.cfa_reg == stack_pointer_rtx)
7302 m->fs.cfa_offset += rounded_size;
7303 add_reg_note (insn, REG_CFA_DEF_CFA,
7304 plus_constant (Pmode, stack_pointer_rtx,
7305 m->fs.cfa_offset));
7306 RTX_FRAME_RELATED_P (insn) = 1;
7308 m->fs.sp_offset += rounded_size;
7309 emit_insn (gen_blockage ());
7311 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7312 is equal to ROUNDED_SIZE. */
7314 if (size != rounded_size)
7316 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7317 GEN_INT (rounded_size - size), -1,
7318 m->fs.cfa_reg == stack_pointer_rtx);
7320 if (protection_area)
7321 emit_stack_probe (stack_pointer_rtx);
7324 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7326 /* This does not deallocate the space reserved for the scratch
7327 register. That will be deallocated in the epilogue. */
7328 release_scratch_register_on_entry (&sr, size, false);
7331 /* Adjust back to account for the protection area. */
7332 if (protection_area)
7333 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7334 GEN_INT (probe_interval + dope), -1,
7335 m->fs.cfa_reg == stack_pointer_rtx);
7337 /* Make sure nothing is scheduled before we are done. */
7338 emit_insn (gen_blockage ());
7341 /* Adjust the stack pointer up to REG while probing it. */
7343 const char *
7344 output_adjust_stack_and_probe (rtx reg)
7346 static int labelno = 0;
7347 char loop_lab[32];
7348 rtx xops[2];
7350 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7352 /* Loop. */
7353 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7355 /* SP = SP + PROBE_INTERVAL. */
7356 xops[0] = stack_pointer_rtx;
7357 xops[1] = GEN_INT (get_probe_interval ());
7358 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7360 /* Probe at SP. */
7361 xops[1] = const0_rtx;
7362 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7364 /* Test if SP == LAST_ADDR. */
7365 xops[0] = stack_pointer_rtx;
7366 xops[1] = reg;
7367 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7369 /* Branch. */
7370 fputs ("\tjne\t", asm_out_file);
7371 assemble_name_raw (asm_out_file, loop_lab);
7372 fputc ('\n', asm_out_file);
7374 return "";
7377 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7378 inclusive. These are offsets from the current stack pointer.
7380 INT_REGISTERS_SAVED is true if integer registers have already been
7381 pushed on the stack. */
7383 static void
7384 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7385 const bool int_registers_saved)
7387 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7389 /* See if we have a constant small number of probes to generate. If so,
7390 that's the easy case. The run-time loop is made up of 6 insns in the
7391 generic case while the compile-time loop is made up of n insns for n #
7392 of intervals. */
7393 if (size <= 6 * probe_interval)
7395 HOST_WIDE_INT i;
7397 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7398 it exceeds SIZE. If only one probe is needed, this will not
7399 generate any code. Then probe at FIRST + SIZE. */
7400 for (i = probe_interval; i < size; i += probe_interval)
7401 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7402 -(first + i)));
7404 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7405 -(first + size)));
7408 /* Otherwise, do the same as above, but in a loop. Note that we must be
7409 extra careful with variables wrapping around because we might be at
7410 the very top (or the very bottom) of the address space and we have
7411 to be able to handle this case properly; in particular, we use an
7412 equality test for the loop condition. */
7413 else
7415 /* We expect the GP registers to be saved when probes are used
7416 as the probing sequences might need a scratch register and
7417 the routine to allocate one assumes the integer registers
7418 have already been saved. */
7419 gcc_assert (int_registers_saved);
7421 HOST_WIDE_INT rounded_size, last;
7422 struct scratch_reg sr;
7424 get_scratch_register_on_entry (&sr);
7427 /* Step 1: round SIZE to the previous multiple of the interval. */
7429 rounded_size = ROUND_DOWN (size, probe_interval);
7432 /* Step 2: compute initial and final value of the loop counter. */
7434 /* TEST_OFFSET = FIRST. */
7435 emit_move_insn (sr.reg, GEN_INT (-first));
7437 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7438 last = first + rounded_size;
7441 /* Step 3: the loop
7445 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7446 probe at TEST_ADDR
7448 while (TEST_ADDR != LAST_ADDR)
7450 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7451 until it is equal to ROUNDED_SIZE. */
7453 emit_insn
7454 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7457 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7458 that SIZE is equal to ROUNDED_SIZE. */
7460 if (size != rounded_size)
7461 emit_stack_probe (plus_constant (Pmode,
7462 gen_rtx_PLUS (Pmode,
7463 stack_pointer_rtx,
7464 sr.reg),
7465 rounded_size - size));
7467 release_scratch_register_on_entry (&sr, size, true);
7470 /* Make sure nothing is scheduled before we are done. */
7471 emit_insn (gen_blockage ());
7474 /* Probe a range of stack addresses from REG to END, inclusive. These are
7475 offsets from the current stack pointer. */
7477 const char *
7478 output_probe_stack_range (rtx reg, rtx end)
7480 static int labelno = 0;
7481 char loop_lab[32];
7482 rtx xops[3];
7484 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7486 /* Loop. */
7487 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7489 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7490 xops[0] = reg;
7491 xops[1] = GEN_INT (get_probe_interval ());
7492 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7494 /* Probe at TEST_ADDR. */
7495 xops[0] = stack_pointer_rtx;
7496 xops[1] = reg;
7497 xops[2] = const0_rtx;
7498 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7500 /* Test if TEST_ADDR == LAST_ADDR. */
7501 xops[0] = reg;
7502 xops[1] = end;
7503 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7505 /* Branch. */
7506 fputs ("\tjne\t", asm_out_file);
7507 assemble_name_raw (asm_out_file, loop_lab);
7508 fputc ('\n', asm_out_file);
7510 return "";
7513 /* Set stack_frame_required to false if stack frame isn't required.
7514 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7515 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7517 static void
7518 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7519 bool check_stack_slot)
7521 HARD_REG_SET set_up_by_prologue, prologue_used;
7522 basic_block bb;
7524 CLEAR_HARD_REG_SET (prologue_used);
7525 CLEAR_HARD_REG_SET (set_up_by_prologue);
7526 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7527 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7528 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7529 HARD_FRAME_POINTER_REGNUM);
7531 /* The preferred stack alignment is the minimum stack alignment. */
7532 if (stack_alignment > crtl->preferred_stack_boundary)
7533 stack_alignment = crtl->preferred_stack_boundary;
7535 bool require_stack_frame = false;
7537 FOR_EACH_BB_FN (bb, cfun)
7539 rtx_insn *insn;
7540 FOR_BB_INSNS (bb, insn)
7541 if (NONDEBUG_INSN_P (insn)
7542 && requires_stack_frame_p (insn, prologue_used,
7543 set_up_by_prologue))
7545 require_stack_frame = true;
7547 if (check_stack_slot)
7549 /* Find the maximum stack alignment. */
7550 subrtx_iterator::array_type array;
7551 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7552 if (MEM_P (*iter)
7553 && (reg_mentioned_p (stack_pointer_rtx,
7554 *iter)
7555 || reg_mentioned_p (frame_pointer_rtx,
7556 *iter)))
7558 unsigned int alignment = MEM_ALIGN (*iter);
7559 if (alignment > stack_alignment)
7560 stack_alignment = alignment;
7566 cfun->machine->stack_frame_required = require_stack_frame;
7569 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7570 will guide prologue/epilogue to be generated in correct form. */
7572 static void
7573 ix86_finalize_stack_frame_flags (void)
7575 /* Check if stack realign is really needed after reload, and
7576 stores result in cfun */
7577 unsigned int incoming_stack_boundary
7578 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7579 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7580 unsigned int stack_alignment
7581 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7582 ? crtl->max_used_stack_slot_alignment
7583 : crtl->stack_alignment_needed);
7584 unsigned int stack_realign
7585 = (incoming_stack_boundary < stack_alignment);
7586 bool recompute_frame_layout_p = false;
7588 if (crtl->stack_realign_finalized)
7590 /* After stack_realign_needed is finalized, we can't no longer
7591 change it. */
7592 gcc_assert (crtl->stack_realign_needed == stack_realign);
7593 return;
7596 /* It is always safe to compute max_used_stack_alignment. We
7597 compute it only if 128-bit aligned load/store may be generated
7598 on misaligned stack slot which will lead to segfault. */
7599 bool check_stack_slot
7600 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7601 ix86_find_max_used_stack_alignment (stack_alignment,
7602 check_stack_slot);
7604 /* If the only reason for frame_pointer_needed is that we conservatively
7605 assumed stack realignment might be needed or -fno-omit-frame-pointer
7606 is used, but in the end nothing that needed the stack alignment had
7607 been spilled nor stack access, clear frame_pointer_needed and say we
7608 don't need stack realignment. */
7609 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7610 && frame_pointer_needed
7611 && crtl->is_leaf
7612 && crtl->sp_is_unchanging
7613 && !ix86_current_function_calls_tls_descriptor
7614 && !crtl->accesses_prior_frames
7615 && !cfun->calls_alloca
7616 && !crtl->calls_eh_return
7617 /* See ira_setup_eliminable_regset for the rationale. */
7618 && !(STACK_CHECK_MOVING_SP
7619 && flag_stack_check
7620 && flag_exceptions
7621 && cfun->can_throw_non_call_exceptions)
7622 && !ix86_frame_pointer_required ()
7623 && ix86_get_frame_size () == 0
7624 && ix86_nsaved_sseregs () == 0
7625 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7627 if (cfun->machine->stack_frame_required)
7629 /* Stack frame is required. If stack alignment needed is less
7630 than incoming stack boundary, don't realign stack. */
7631 stack_realign = incoming_stack_boundary < stack_alignment;
7632 if (!stack_realign)
7634 crtl->max_used_stack_slot_alignment
7635 = incoming_stack_boundary;
7636 crtl->stack_alignment_needed
7637 = incoming_stack_boundary;
7638 /* Also update preferred_stack_boundary for leaf
7639 functions. */
7640 crtl->preferred_stack_boundary
7641 = incoming_stack_boundary;
7644 else
7646 /* If drap has been set, but it actually isn't live at the
7647 start of the function, there is no reason to set it up. */
7648 if (crtl->drap_reg)
7650 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7651 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7652 REGNO (crtl->drap_reg)))
7654 crtl->drap_reg = NULL_RTX;
7655 crtl->need_drap = false;
7658 else
7659 cfun->machine->no_drap_save_restore = true;
7661 frame_pointer_needed = false;
7662 stack_realign = false;
7663 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7664 crtl->stack_alignment_needed = incoming_stack_boundary;
7665 crtl->stack_alignment_estimated = incoming_stack_boundary;
7666 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7667 crtl->preferred_stack_boundary = incoming_stack_boundary;
7668 df_finish_pass (true);
7669 df_scan_alloc (NULL);
7670 df_scan_blocks ();
7671 df_compute_regs_ever_live (true);
7672 df_analyze ();
7674 if (flag_var_tracking)
7676 /* Since frame pointer is no longer available, replace it with
7677 stack pointer - UNITS_PER_WORD in debug insns. */
7678 df_ref ref, next;
7679 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7680 ref; ref = next)
7682 next = DF_REF_NEXT_REG (ref);
7683 if (!DF_REF_INSN_INFO (ref))
7684 continue;
7686 /* Make sure the next ref is for a different instruction,
7687 so that we're not affected by the rescan. */
7688 rtx_insn *insn = DF_REF_INSN (ref);
7689 while (next && DF_REF_INSN (next) == insn)
7690 next = DF_REF_NEXT_REG (next);
7692 if (DEBUG_INSN_P (insn))
7694 bool changed = false;
7695 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7697 rtx *loc = DF_REF_LOC (ref);
7698 if (*loc == hard_frame_pointer_rtx)
7700 *loc = plus_constant (Pmode,
7701 stack_pointer_rtx,
7702 -UNITS_PER_WORD);
7703 changed = true;
7706 if (changed)
7707 df_insn_rescan (insn);
7712 recompute_frame_layout_p = true;
7715 else if (crtl->max_used_stack_slot_alignment >= 128
7716 && cfun->machine->stack_frame_required)
7718 /* We don't need to realign stack. max_used_stack_alignment is
7719 used to decide how stack frame should be aligned. This is
7720 independent of any psABIs nor 32-bit vs 64-bit. */
7721 cfun->machine->max_used_stack_alignment
7722 = stack_alignment / BITS_PER_UNIT;
7725 if (crtl->stack_realign_needed != stack_realign)
7726 recompute_frame_layout_p = true;
7727 crtl->stack_realign_needed = stack_realign;
7728 crtl->stack_realign_finalized = true;
7729 if (recompute_frame_layout_p)
7730 ix86_compute_frame_layout ();
7733 /* Delete SET_GOT right after entry block if it is allocated to reg. */
7735 static void
7736 ix86_elim_entry_set_got (rtx reg)
7738 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7739 rtx_insn *c_insn = BB_HEAD (bb);
7740 if (!NONDEBUG_INSN_P (c_insn))
7741 c_insn = next_nonnote_nondebug_insn (c_insn);
7742 if (c_insn && NONJUMP_INSN_P (c_insn))
7744 rtx pat = PATTERN (c_insn);
7745 if (GET_CODE (pat) == PARALLEL)
7747 rtx vec = XVECEXP (pat, 0, 0);
7748 if (GET_CODE (vec) == SET
7749 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
7750 && REGNO (XEXP (vec, 0)) == REGNO (reg))
7751 delete_insn (c_insn);
7756 static rtx
7757 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
7759 rtx addr, mem;
7761 if (offset)
7762 addr = plus_constant (Pmode, frame_reg, offset);
7763 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
7764 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
7767 static inline rtx
7768 gen_frame_load (rtx reg, rtx frame_reg, int offset)
7770 return gen_frame_set (reg, frame_reg, offset, false);
7773 static inline rtx
7774 gen_frame_store (rtx reg, rtx frame_reg, int offset)
7776 return gen_frame_set (reg, frame_reg, offset, true);
7779 static void
7780 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
7782 struct machine_function *m = cfun->machine;
7783 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
7784 + m->call_ms2sysv_extra_regs;
7785 rtvec v = rtvec_alloc (ncregs + 1);
7786 unsigned int align, i, vi = 0;
7787 rtx_insn *insn;
7788 rtx sym, addr;
7789 rtx rax = gen_rtx_REG (word_mode, AX_REG);
7790 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
7792 /* AL should only be live with sysv_abi. */
7793 gcc_assert (!ix86_eax_live_at_start_p ());
7794 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
7796 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
7797 we've actually realigned the stack or not. */
7798 align = GET_MODE_ALIGNMENT (V4SFmode);
7799 addr = choose_baseaddr (frame.stack_realign_offset
7800 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
7801 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
7803 emit_insn (gen_rtx_SET (rax, addr));
7805 /* Get the stub symbol. */
7806 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
7807 : XLOGUE_STUB_SAVE);
7808 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
7810 for (i = 0; i < ncregs; ++i)
7812 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
7813 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
7814 r.regno);
7815 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
7818 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
7820 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
7821 RTX_FRAME_RELATED_P (insn) = true;
7824 /* Generate and return an insn body to AND X with Y. */
7826 static rtx_insn *
7827 gen_and2_insn (rtx x, rtx y)
7829 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
7831 gcc_assert (insn_operand_matches (icode, 0, x));
7832 gcc_assert (insn_operand_matches (icode, 1, x));
7833 gcc_assert (insn_operand_matches (icode, 2, y));
7835 return GEN_FCN (icode) (x, x, y);
7838 /* Expand the prologue into a bunch of separate insns. */
7840 void
7841 ix86_expand_prologue (void)
7843 struct machine_function *m = cfun->machine;
7844 rtx insn, t;
7845 HOST_WIDE_INT allocate;
7846 bool int_registers_saved;
7847 bool sse_registers_saved;
7848 bool save_stub_call_needed;
7849 rtx static_chain = NULL_RTX;
7851 if (ix86_function_naked (current_function_decl))
7852 return;
7854 ix86_finalize_stack_frame_flags ();
7856 /* DRAP should not coexist with stack_realign_fp */
7857 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7859 memset (&m->fs, 0, sizeof (m->fs));
7861 /* Initialize CFA state for before the prologue. */
7862 m->fs.cfa_reg = stack_pointer_rtx;
7863 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
7865 /* Track SP offset to the CFA. We continue tracking this after we've
7866 swapped the CFA register away from SP. In the case of re-alignment
7867 this is fudged; we're interested to offsets within the local frame. */
7868 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
7869 m->fs.sp_valid = true;
7870 m->fs.sp_realigned = false;
7872 const struct ix86_frame &frame = cfun->machine->frame;
7874 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
7876 /* We should have already generated an error for any use of
7877 ms_hook on a nested function. */
7878 gcc_checking_assert (!ix86_static_chain_on_stack);
7880 /* Check if profiling is active and we shall use profiling before
7881 prologue variant. If so sorry. */
7882 if (crtl->profile && flag_fentry != 0)
7883 sorry ("%<ms_hook_prologue%> attribute is not compatible "
7884 "with %<-mfentry%> for 32-bit");
7886 /* In ix86_asm_output_function_label we emitted:
7887 8b ff movl.s %edi,%edi
7888 55 push %ebp
7889 8b ec movl.s %esp,%ebp
7891 This matches the hookable function prologue in Win32 API
7892 functions in Microsoft Windows XP Service Pack 2 and newer.
7893 Wine uses this to enable Windows apps to hook the Win32 API
7894 functions provided by Wine.
7896 What that means is that we've already set up the frame pointer. */
7898 if (frame_pointer_needed
7899 && !(crtl->drap_reg && crtl->stack_realign_needed))
7901 rtx push, mov;
7903 /* We've decided to use the frame pointer already set up.
7904 Describe this to the unwinder by pretending that both
7905 push and mov insns happen right here.
7907 Putting the unwind info here at the end of the ms_hook
7908 is done so that we can make absolutely certain we get
7909 the required byte sequence at the start of the function,
7910 rather than relying on an assembler that can produce
7911 the exact encoding required.
7913 However it does mean (in the unpatched case) that we have
7914 a 1 insn window where the asynchronous unwind info is
7915 incorrect. However, if we placed the unwind info at
7916 its correct location we would have incorrect unwind info
7917 in the patched case. Which is probably all moot since
7918 I don't expect Wine generates dwarf2 unwind info for the
7919 system libraries that use this feature. */
7921 insn = emit_insn (gen_blockage ());
7923 push = gen_push (hard_frame_pointer_rtx);
7924 mov = gen_rtx_SET (hard_frame_pointer_rtx,
7925 stack_pointer_rtx);
7926 RTX_FRAME_RELATED_P (push) = 1;
7927 RTX_FRAME_RELATED_P (mov) = 1;
7929 RTX_FRAME_RELATED_P (insn) = 1;
7930 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7931 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
7933 /* Note that gen_push incremented m->fs.cfa_offset, even
7934 though we didn't emit the push insn here. */
7935 m->fs.cfa_reg = hard_frame_pointer_rtx;
7936 m->fs.fp_offset = m->fs.cfa_offset;
7937 m->fs.fp_valid = true;
7939 else
7941 /* The frame pointer is not needed so pop %ebp again.
7942 This leaves us with a pristine state. */
7943 emit_insn (gen_pop (hard_frame_pointer_rtx));
7947 /* The first insn of a function that accepts its static chain on the
7948 stack is to push the register that would be filled in by a direct
7949 call. This insn will be skipped by the trampoline. */
7950 else if (ix86_static_chain_on_stack)
7952 static_chain = ix86_static_chain (cfun->decl, false);
7953 insn = emit_insn (gen_push (static_chain));
7954 emit_insn (gen_blockage ());
7956 /* We don't want to interpret this push insn as a register save,
7957 only as a stack adjustment. The real copy of the register as
7958 a save will be done later, if needed. */
7959 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7960 t = gen_rtx_SET (stack_pointer_rtx, t);
7961 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
7962 RTX_FRAME_RELATED_P (insn) = 1;
7965 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7966 of DRAP is needed and stack realignment is really needed after reload */
7967 if (stack_realign_drap)
7969 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7971 /* Can't use DRAP in interrupt function. */
7972 if (cfun->machine->func_type != TYPE_NORMAL)
7973 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
7974 "in interrupt service routine. This may be worked "
7975 "around by avoiding functions with aggregate return.");
7977 /* Only need to push parameter pointer reg if it is caller saved. */
7978 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
7980 /* Push arg pointer reg */
7981 insn = emit_insn (gen_push (crtl->drap_reg));
7982 RTX_FRAME_RELATED_P (insn) = 1;
7985 /* Grab the argument pointer. */
7986 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
7987 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
7988 RTX_FRAME_RELATED_P (insn) = 1;
7989 m->fs.cfa_reg = crtl->drap_reg;
7990 m->fs.cfa_offset = 0;
7992 /* Align the stack. */
7993 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
7994 GEN_INT (-align_bytes)));
7995 RTX_FRAME_RELATED_P (insn) = 1;
7997 /* Replicate the return address on the stack so that return
7998 address can be reached via (argp - 1) slot. This is needed
7999 to implement macro RETURN_ADDR_RTX and intrinsic function
8000 expand_builtin_return_addr etc. */
8001 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8002 t = gen_frame_mem (word_mode, t);
8003 insn = emit_insn (gen_push (t));
8004 RTX_FRAME_RELATED_P (insn) = 1;
8006 /* For the purposes of frame and register save area addressing,
8007 we've started over with a new frame. */
8008 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8009 m->fs.realigned = true;
8011 if (static_chain)
8013 /* Replicate static chain on the stack so that static chain
8014 can be reached via (argp - 2) slot. This is needed for
8015 nested function with stack realignment. */
8016 insn = emit_insn (gen_push (static_chain));
8017 RTX_FRAME_RELATED_P (insn) = 1;
8021 int_registers_saved = (frame.nregs == 0);
8022 sse_registers_saved = (frame.nsseregs == 0);
8023 save_stub_call_needed = (m->call_ms2sysv);
8024 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8026 if (frame_pointer_needed && !m->fs.fp_valid)
8028 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8029 slower on all targets. Also sdb didn't like it. */
8030 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8031 RTX_FRAME_RELATED_P (insn) = 1;
8033 /* Push registers now, before setting the frame pointer
8034 on SEH target. */
8035 if (!int_registers_saved
8036 && TARGET_SEH
8037 && !frame.save_regs_using_mov)
8039 ix86_emit_save_regs ();
8040 int_registers_saved = true;
8041 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8044 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8046 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8047 RTX_FRAME_RELATED_P (insn) = 1;
8049 if (m->fs.cfa_reg == stack_pointer_rtx)
8050 m->fs.cfa_reg = hard_frame_pointer_rtx;
8051 m->fs.fp_offset = m->fs.sp_offset;
8052 m->fs.fp_valid = true;
8056 if (!int_registers_saved)
8058 /* If saving registers via PUSH, do so now. */
8059 if (!frame.save_regs_using_mov)
8061 ix86_emit_save_regs ();
8062 int_registers_saved = true;
8063 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8066 /* When using red zone we may start register saving before allocating
8067 the stack frame saving one cycle of the prologue. However, avoid
8068 doing this if we have to probe the stack; at least on x86_64 the
8069 stack probe can turn into a call that clobbers a red zone location. */
8070 else if (ix86_using_red_zone ()
8071 && (! TARGET_STACK_PROBE
8072 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8074 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8075 int_registers_saved = true;
8079 if (stack_realign_fp)
8081 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8082 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8084 /* Record last valid frame pointer offset. */
8085 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8087 /* The computation of the size of the re-aligned stack frame means
8088 that we must allocate the size of the register save area before
8089 performing the actual alignment. Otherwise we cannot guarantee
8090 that there's enough storage above the realignment point. */
8091 allocate = frame.reg_save_offset - m->fs.sp_offset
8092 + frame.stack_realign_allocate;
8093 if (allocate)
8094 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8095 GEN_INT (-allocate), -1, false);
8097 /* Align the stack. */
8098 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8099 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8100 m->fs.sp_realigned_offset = m->fs.sp_offset
8101 - frame.stack_realign_allocate;
8102 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8103 Beyond this point, stack access should be done via choose_baseaddr or
8104 by using sp_valid_at and fp_valid_at to determine the correct base
8105 register. Henceforth, any CFA offset should be thought of as logical
8106 and not physical. */
8107 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8108 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8109 m->fs.sp_realigned = true;
8111 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8112 is needed to describe where a register is saved using a realigned
8113 stack pointer, so we need to invalidate the stack pointer for that
8114 target. */
8115 if (TARGET_SEH)
8116 m->fs.sp_valid = false;
8118 /* If SP offset is non-immediate after allocation of the stack frame,
8119 then emit SSE saves or stub call prior to allocating the rest of the
8120 stack frame. This is less efficient for the out-of-line stub because
8121 we can't combine allocations across the call barrier, but it's better
8122 than using a scratch register. */
8123 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8124 - m->fs.sp_realigned_offset),
8125 Pmode))
8127 if (!sse_registers_saved)
8129 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8130 sse_registers_saved = true;
8132 else if (save_stub_call_needed)
8134 ix86_emit_outlined_ms2sysv_save (frame);
8135 save_stub_call_needed = false;
8140 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8142 if (flag_stack_usage_info)
8144 /* We start to count from ARG_POINTER. */
8145 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8147 /* If it was realigned, take into account the fake frame. */
8148 if (stack_realign_drap)
8150 if (ix86_static_chain_on_stack)
8151 stack_size += UNITS_PER_WORD;
8153 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8154 stack_size += UNITS_PER_WORD;
8156 /* This over-estimates by 1 minimal-stack-alignment-unit but
8157 mitigates that by counting in the new return address slot. */
8158 current_function_dynamic_stack_size
8159 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8162 current_function_static_stack_size = stack_size;
8165 /* On SEH target with very large frame size, allocate an area to save
8166 SSE registers (as the very large allocation won't be described). */
8167 if (TARGET_SEH
8168 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8169 && !sse_registers_saved)
8171 HOST_WIDE_INT sse_size
8172 = frame.sse_reg_save_offset - frame.reg_save_offset;
8174 gcc_assert (int_registers_saved);
8176 /* No need to do stack checking as the area will be immediately
8177 written. */
8178 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8179 GEN_INT (-sse_size), -1,
8180 m->fs.cfa_reg == stack_pointer_rtx);
8181 allocate -= sse_size;
8182 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8183 sse_registers_saved = true;
8186 /* If stack clash protection is requested, then probe the stack. */
8187 if (allocate >= 0 && flag_stack_clash_protection)
8189 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
8190 allocate = 0;
8193 /* The stack has already been decremented by the instruction calling us
8194 so probe if the size is non-negative to preserve the protection area. */
8195 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8197 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8199 if (STACK_CHECK_MOVING_SP)
8201 if (crtl->is_leaf
8202 && !cfun->calls_alloca
8203 && allocate <= probe_interval)
8206 else
8208 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
8209 allocate = 0;
8213 else
8215 HOST_WIDE_INT size = allocate;
8217 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8218 size = 0x80000000 - get_stack_check_protect () - 1;
8220 if (TARGET_STACK_PROBE)
8222 if (crtl->is_leaf && !cfun->calls_alloca)
8224 if (size > probe_interval)
8225 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8227 else
8228 ix86_emit_probe_stack_range (0,
8229 size + get_stack_check_protect (),
8230 int_registers_saved);
8232 else
8234 if (crtl->is_leaf && !cfun->calls_alloca)
8236 if (size > probe_interval
8237 && size > get_stack_check_protect ())
8238 ix86_emit_probe_stack_range (get_stack_check_protect (),
8239 (size
8240 - get_stack_check_protect ()),
8241 int_registers_saved);
8243 else
8244 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8245 int_registers_saved);
8250 if (allocate == 0)
8252 else if (!ix86_target_stack_probe ()
8253 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8255 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8256 GEN_INT (-allocate), -1,
8257 m->fs.cfa_reg == stack_pointer_rtx);
8259 else
8261 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8262 rtx r10 = NULL;
8263 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8264 bool eax_live = ix86_eax_live_at_start_p ();
8265 bool r10_live = false;
8267 if (TARGET_64BIT)
8268 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8270 if (eax_live)
8272 insn = emit_insn (gen_push (eax));
8273 allocate -= UNITS_PER_WORD;
8274 /* Note that SEH directives need to continue tracking the stack
8275 pointer even after the frame pointer has been set up. */
8276 if (sp_is_cfa_reg || TARGET_SEH)
8278 if (sp_is_cfa_reg)
8279 m->fs.cfa_offset += UNITS_PER_WORD;
8280 RTX_FRAME_RELATED_P (insn) = 1;
8281 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8282 gen_rtx_SET (stack_pointer_rtx,
8283 plus_constant (Pmode,
8284 stack_pointer_rtx,
8285 -UNITS_PER_WORD)));
8289 if (r10_live)
8291 r10 = gen_rtx_REG (Pmode, R10_REG);
8292 insn = emit_insn (gen_push (r10));
8293 allocate -= UNITS_PER_WORD;
8294 if (sp_is_cfa_reg || TARGET_SEH)
8296 if (sp_is_cfa_reg)
8297 m->fs.cfa_offset += UNITS_PER_WORD;
8298 RTX_FRAME_RELATED_P (insn) = 1;
8299 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8300 gen_rtx_SET (stack_pointer_rtx,
8301 plus_constant (Pmode,
8302 stack_pointer_rtx,
8303 -UNITS_PER_WORD)));
8307 emit_move_insn (eax, GEN_INT (allocate));
8308 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8310 /* Use the fact that AX still contains ALLOCATE. */
8311 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8312 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8314 if (sp_is_cfa_reg || TARGET_SEH)
8316 if (sp_is_cfa_reg)
8317 m->fs.cfa_offset += allocate;
8318 RTX_FRAME_RELATED_P (insn) = 1;
8319 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8320 gen_rtx_SET (stack_pointer_rtx,
8321 plus_constant (Pmode, stack_pointer_rtx,
8322 -allocate)));
8324 m->fs.sp_offset += allocate;
8326 /* Use stack_pointer_rtx for relative addressing so that code works for
8327 realigned stack. But this means that we need a blockage to prevent
8328 stores based on the frame pointer from being scheduled before. */
8329 if (r10_live && eax_live)
8331 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8332 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8333 gen_frame_mem (word_mode, t));
8334 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8335 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8336 gen_frame_mem (word_mode, t));
8337 emit_insn (gen_memory_blockage ());
8339 else if (eax_live || r10_live)
8341 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8342 emit_move_insn (gen_rtx_REG (word_mode,
8343 (eax_live ? AX_REG : R10_REG)),
8344 gen_frame_mem (word_mode, t));
8345 emit_insn (gen_memory_blockage ());
8348 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8350 /* If we havn't already set up the frame pointer, do so now. */
8351 if (frame_pointer_needed && !m->fs.fp_valid)
8353 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8354 GEN_INT (frame.stack_pointer_offset
8355 - frame.hard_frame_pointer_offset));
8356 insn = emit_insn (insn);
8357 RTX_FRAME_RELATED_P (insn) = 1;
8358 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8360 if (m->fs.cfa_reg == stack_pointer_rtx)
8361 m->fs.cfa_reg = hard_frame_pointer_rtx;
8362 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8363 m->fs.fp_valid = true;
8366 if (!int_registers_saved)
8367 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8368 if (!sse_registers_saved)
8369 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8370 else if (save_stub_call_needed)
8371 ix86_emit_outlined_ms2sysv_save (frame);
8373 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8374 in PROLOGUE. */
8375 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8377 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8378 insn = emit_insn (gen_set_got (pic));
8379 RTX_FRAME_RELATED_P (insn) = 1;
8380 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8381 emit_insn (gen_prologue_use (pic));
8382 /* Deleting already emmitted SET_GOT if exist and allocated to
8383 REAL_PIC_OFFSET_TABLE_REGNUM. */
8384 ix86_elim_entry_set_got (pic);
8387 if (crtl->drap_reg && !crtl->stack_realign_needed)
8389 /* vDRAP is setup but after reload it turns out stack realign
8390 isn't necessary, here we will emit prologue to setup DRAP
8391 without stack realign adjustment */
8392 t = choose_baseaddr (0, NULL);
8393 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8396 /* Prevent instructions from being scheduled into register save push
8397 sequence when access to the redzone area is done through frame pointer.
8398 The offset between the frame pointer and the stack pointer is calculated
8399 relative to the value of the stack pointer at the end of the function
8400 prologue, and moving instructions that access redzone area via frame
8401 pointer inside push sequence violates this assumption. */
8402 if (frame_pointer_needed && frame.red_zone_size)
8403 emit_insn (gen_memory_blockage ());
8405 /* SEH requires that the prologue end within 256 bytes of the start of
8406 the function. Prevent instruction schedules that would extend that.
8407 Further, prevent alloca modifications to the stack pointer from being
8408 combined with prologue modifications. */
8409 if (TARGET_SEH)
8410 emit_insn (gen_prologue_use (stack_pointer_rtx));
8413 /* Emit code to restore REG using a POP insn. */
8415 static void
8416 ix86_emit_restore_reg_using_pop (rtx reg)
8418 struct machine_function *m = cfun->machine;
8419 rtx_insn *insn = emit_insn (gen_pop (reg));
8421 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8422 m->fs.sp_offset -= UNITS_PER_WORD;
8424 if (m->fs.cfa_reg == crtl->drap_reg
8425 && REGNO (reg) == REGNO (crtl->drap_reg))
8427 /* Previously we'd represented the CFA as an expression
8428 like *(%ebp - 8). We've just popped that value from
8429 the stack, which means we need to reset the CFA to
8430 the drap register. This will remain until we restore
8431 the stack pointer. */
8432 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8433 RTX_FRAME_RELATED_P (insn) = 1;
8435 /* This means that the DRAP register is valid for addressing too. */
8436 m->fs.drap_valid = true;
8437 return;
8440 if (m->fs.cfa_reg == stack_pointer_rtx)
8442 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8443 x = gen_rtx_SET (stack_pointer_rtx, x);
8444 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8445 RTX_FRAME_RELATED_P (insn) = 1;
8447 m->fs.cfa_offset -= UNITS_PER_WORD;
8450 /* When the frame pointer is the CFA, and we pop it, we are
8451 swapping back to the stack pointer as the CFA. This happens
8452 for stack frames that don't allocate other data, so we assume
8453 the stack pointer is now pointing at the return address, i.e.
8454 the function entry state, which makes the offset be 1 word. */
8455 if (reg == hard_frame_pointer_rtx)
8457 m->fs.fp_valid = false;
8458 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8460 m->fs.cfa_reg = stack_pointer_rtx;
8461 m->fs.cfa_offset -= UNITS_PER_WORD;
8463 add_reg_note (insn, REG_CFA_DEF_CFA,
8464 plus_constant (Pmode, stack_pointer_rtx,
8465 m->fs.cfa_offset));
8466 RTX_FRAME_RELATED_P (insn) = 1;
8471 /* Emit code to restore saved registers using POP insns. */
8473 static void
8474 ix86_emit_restore_regs_using_pop (void)
8476 unsigned int regno;
8478 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8479 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8480 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8483 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8484 omits the emit and only attaches the notes. */
8486 static void
8487 ix86_emit_leave (rtx_insn *insn)
8489 struct machine_function *m = cfun->machine;
8491 if (!insn)
8492 insn = emit_insn (gen_leave (word_mode));
8494 ix86_add_queued_cfa_restore_notes (insn);
8496 gcc_assert (m->fs.fp_valid);
8497 m->fs.sp_valid = true;
8498 m->fs.sp_realigned = false;
8499 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8500 m->fs.fp_valid = false;
8502 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8504 m->fs.cfa_reg = stack_pointer_rtx;
8505 m->fs.cfa_offset = m->fs.sp_offset;
8507 add_reg_note (insn, REG_CFA_DEF_CFA,
8508 plus_constant (Pmode, stack_pointer_rtx,
8509 m->fs.sp_offset));
8510 RTX_FRAME_RELATED_P (insn) = 1;
8512 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8513 m->fs.fp_offset);
8516 /* Emit code to restore saved registers using MOV insns.
8517 First register is restored from CFA - CFA_OFFSET. */
8518 static void
8519 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8520 bool maybe_eh_return)
8522 struct machine_function *m = cfun->machine;
8523 unsigned int regno;
8525 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8526 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8528 rtx reg = gen_rtx_REG (word_mode, regno);
8529 rtx mem;
8530 rtx_insn *insn;
8532 mem = choose_baseaddr (cfa_offset, NULL);
8533 mem = gen_frame_mem (word_mode, mem);
8534 insn = emit_move_insn (reg, mem);
8536 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8538 /* Previously we'd represented the CFA as an expression
8539 like *(%ebp - 8). We've just popped that value from
8540 the stack, which means we need to reset the CFA to
8541 the drap register. This will remain until we restore
8542 the stack pointer. */
8543 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8544 RTX_FRAME_RELATED_P (insn) = 1;
8546 /* This means that the DRAP register is valid for addressing. */
8547 m->fs.drap_valid = true;
8549 else
8550 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8552 cfa_offset -= UNITS_PER_WORD;
8556 /* Emit code to restore saved registers using MOV insns.
8557 First register is restored from CFA - CFA_OFFSET. */
8558 static void
8559 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8560 bool maybe_eh_return)
8562 unsigned int regno;
8564 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8565 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8567 rtx reg = gen_rtx_REG (V4SFmode, regno);
8568 rtx mem;
8569 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8571 mem = choose_baseaddr (cfa_offset, &align);
8572 mem = gen_rtx_MEM (V4SFmode, mem);
8574 /* The location aligment depends upon the base register. */
8575 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8576 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8577 set_mem_align (mem, align);
8578 emit_insn (gen_rtx_SET (reg, mem));
8580 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8582 cfa_offset -= GET_MODE_SIZE (V4SFmode);
8586 static void
8587 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8588 bool use_call, int style)
8590 struct machine_function *m = cfun->machine;
8591 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8592 + m->call_ms2sysv_extra_regs;
8593 rtvec v;
8594 unsigned int elems_needed, align, i, vi = 0;
8595 rtx_insn *insn;
8596 rtx sym, tmp;
8597 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8598 rtx r10 = NULL_RTX;
8599 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8600 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8601 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8602 rtx rsi_frame_load = NULL_RTX;
8603 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8604 enum xlogue_stub stub;
8606 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8608 /* If using a realigned stack, we should never start with padding. */
8609 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8611 /* Setup RSI as the stub's base pointer. */
8612 align = GET_MODE_ALIGNMENT (V4SFmode);
8613 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8614 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8616 emit_insn (gen_rtx_SET (rsi, tmp));
8618 /* Get a symbol for the stub. */
8619 if (frame_pointer_needed)
8620 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8621 : XLOGUE_STUB_RESTORE_HFP_TAIL;
8622 else
8623 stub = use_call ? XLOGUE_STUB_RESTORE
8624 : XLOGUE_STUB_RESTORE_TAIL;
8625 sym = xlogue.get_stub_rtx (stub);
8627 elems_needed = ncregs;
8628 if (use_call)
8629 elems_needed += 1;
8630 else
8631 elems_needed += frame_pointer_needed ? 5 : 3;
8632 v = rtvec_alloc (elems_needed);
8634 /* We call the epilogue stub when we need to pop incoming args or we are
8635 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8636 epilogue stub and it is the tail-call. */
8637 if (use_call)
8638 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8639 else
8641 RTVEC_ELT (v, vi++) = ret_rtx;
8642 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8643 if (frame_pointer_needed)
8645 rtx rbp = gen_rtx_REG (DImode, BP_REG);
8646 gcc_assert (m->fs.fp_valid);
8647 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8649 tmp = plus_constant (DImode, rbp, 8);
8650 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8651 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8652 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8653 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8655 else
8657 /* If no hard frame pointer, we set R10 to the SP restore value. */
8658 gcc_assert (!m->fs.fp_valid);
8659 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8660 gcc_assert (m->fs.sp_valid);
8662 r10 = gen_rtx_REG (DImode, R10_REG);
8663 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
8664 emit_insn (gen_rtx_SET (r10, tmp));
8666 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8670 /* Generate frame load insns and restore notes. */
8671 for (i = 0; i < ncregs; ++i)
8673 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8674 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8675 rtx reg, frame_load;
8677 reg = gen_rtx_REG (mode, r.regno);
8678 frame_load = gen_frame_load (reg, rsi, r.offset);
8680 /* Save RSI frame load insn & note to add last. */
8681 if (r.regno == SI_REG)
8683 gcc_assert (!rsi_frame_load);
8684 rsi_frame_load = frame_load;
8685 rsi_restore_offset = r.offset;
8687 else
8689 RTVEC_ELT (v, vi++) = frame_load;
8690 ix86_add_cfa_restore_note (NULL, reg, r.offset);
8694 /* Add RSI frame load & restore note at the end. */
8695 gcc_assert (rsi_frame_load);
8696 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
8697 RTVEC_ELT (v, vi++) = rsi_frame_load;
8698 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
8699 rsi_restore_offset);
8701 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
8702 if (!use_call && !frame_pointer_needed)
8704 gcc_assert (m->fs.sp_valid);
8705 gcc_assert (!m->fs.sp_realigned);
8707 /* At this point, R10 should point to frame.stack_realign_offset. */
8708 if (m->fs.cfa_reg == stack_pointer_rtx)
8709 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
8710 m->fs.sp_offset = frame.stack_realign_offset;
8713 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
8714 tmp = gen_rtx_PARALLEL (VOIDmode, v);
8715 if (use_call)
8716 insn = emit_insn (tmp);
8717 else
8719 insn = emit_jump_insn (tmp);
8720 JUMP_LABEL (insn) = ret_rtx;
8722 if (frame_pointer_needed)
8723 ix86_emit_leave (insn);
8724 else
8726 /* Need CFA adjust note. */
8727 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
8728 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
8732 RTX_FRAME_RELATED_P (insn) = true;
8733 ix86_add_queued_cfa_restore_notes (insn);
8735 /* If we're not doing a tail-call, we need to adjust the stack. */
8736 if (use_call && m->fs.sp_valid)
8738 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
8739 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8740 GEN_INT (dealloc), style,
8741 m->fs.cfa_reg == stack_pointer_rtx);
8745 /* Restore function stack, frame, and registers. */
8747 void
8748 ix86_expand_epilogue (int style)
8750 struct machine_function *m = cfun->machine;
8751 struct machine_frame_state frame_state_save = m->fs;
8752 bool restore_regs_via_mov;
8753 bool using_drap;
8754 bool restore_stub_is_tail = false;
8756 if (ix86_function_naked (current_function_decl))
8758 /* The program should not reach this point. */
8759 emit_insn (gen_ud2 ());
8760 return;
8763 ix86_finalize_stack_frame_flags ();
8764 const struct ix86_frame &frame = cfun->machine->frame;
8766 m->fs.sp_realigned = stack_realign_fp;
8767 m->fs.sp_valid = stack_realign_fp
8768 || !frame_pointer_needed
8769 || crtl->sp_is_unchanging;
8770 gcc_assert (!m->fs.sp_valid
8771 || m->fs.sp_offset == frame.stack_pointer_offset);
8773 /* The FP must be valid if the frame pointer is present. */
8774 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
8775 gcc_assert (!m->fs.fp_valid
8776 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
8778 /* We must have *some* valid pointer to the stack frame. */
8779 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
8781 /* The DRAP is never valid at this point. */
8782 gcc_assert (!m->fs.drap_valid);
8784 /* See the comment about red zone and frame
8785 pointer usage in ix86_expand_prologue. */
8786 if (frame_pointer_needed && frame.red_zone_size)
8787 emit_insn (gen_memory_blockage ());
8789 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8790 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
8792 /* Determine the CFA offset of the end of the red-zone. */
8793 m->fs.red_zone_offset = 0;
8794 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
8796 /* The red-zone begins below return address and error code in
8797 exception handler. */
8798 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
8800 /* When the register save area is in the aligned portion of
8801 the stack, determine the maximum runtime displacement that
8802 matches up with the aligned frame. */
8803 if (stack_realign_drap)
8804 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
8805 + UNITS_PER_WORD);
8808 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
8810 /* Special care must be taken for the normal return case of a function
8811 using eh_return: the eax and edx registers are marked as saved, but
8812 not restored along this path. Adjust the save location to match. */
8813 if (crtl->calls_eh_return && style != 2)
8814 reg_save_offset -= 2 * UNITS_PER_WORD;
8816 /* EH_RETURN requires the use of moves to function properly. */
8817 if (crtl->calls_eh_return)
8818 restore_regs_via_mov = true;
8819 /* SEH requires the use of pops to identify the epilogue. */
8820 else if (TARGET_SEH)
8821 restore_regs_via_mov = false;
8822 /* If we're only restoring one register and sp cannot be used then
8823 using a move instruction to restore the register since it's
8824 less work than reloading sp and popping the register. */
8825 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
8826 restore_regs_via_mov = true;
8827 else if (TARGET_EPILOGUE_USING_MOVE
8828 && cfun->machine->use_fast_prologue_epilogue
8829 && (frame.nregs > 1
8830 || m->fs.sp_offset != reg_save_offset))
8831 restore_regs_via_mov = true;
8832 else if (frame_pointer_needed
8833 && !frame.nregs
8834 && m->fs.sp_offset != reg_save_offset)
8835 restore_regs_via_mov = true;
8836 else if (frame_pointer_needed
8837 && TARGET_USE_LEAVE
8838 && cfun->machine->use_fast_prologue_epilogue
8839 && frame.nregs == 1)
8840 restore_regs_via_mov = true;
8841 else
8842 restore_regs_via_mov = false;
8844 if (restore_regs_via_mov || frame.nsseregs)
8846 /* Ensure that the entire register save area is addressable via
8847 the stack pointer, if we will restore SSE regs via sp. */
8848 if (TARGET_64BIT
8849 && m->fs.sp_offset > 0x7fffffff
8850 && sp_valid_at (frame.stack_realign_offset + 1)
8851 && (frame.nsseregs + frame.nregs) != 0)
8853 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8854 GEN_INT (m->fs.sp_offset
8855 - frame.sse_reg_save_offset),
8856 style,
8857 m->fs.cfa_reg == stack_pointer_rtx);
8861 /* If there are any SSE registers to restore, then we have to do it
8862 via moves, since there's obviously no pop for SSE regs. */
8863 if (frame.nsseregs)
8864 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
8865 style == 2);
8867 if (m->call_ms2sysv)
8869 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
8871 /* We cannot use a tail-call for the stub if:
8872 1. We have to pop incoming args,
8873 2. We have additional int regs to restore, or
8874 3. A sibling call will be the tail-call, or
8875 4. We are emitting an eh_return_internal epilogue.
8877 TODO: Item 4 has not yet tested!
8879 If any of the above are true, we will call the stub rather than
8880 jump to it. */
8881 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
8882 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
8885 /* If using out-of-line stub that is a tail-call, then...*/
8886 if (m->call_ms2sysv && restore_stub_is_tail)
8888 /* TODO: parinoid tests. (remove eventually) */
8889 gcc_assert (m->fs.sp_valid);
8890 gcc_assert (!m->fs.sp_realigned);
8891 gcc_assert (!m->fs.fp_valid);
8892 gcc_assert (!m->fs.realigned);
8893 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
8894 gcc_assert (!crtl->drap_reg);
8895 gcc_assert (!frame.nregs);
8897 else if (restore_regs_via_mov)
8899 rtx t;
8901 if (frame.nregs)
8902 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
8904 /* eh_return epilogues need %ecx added to the stack pointer. */
8905 if (style == 2)
8907 rtx sa = EH_RETURN_STACKADJ_RTX;
8908 rtx_insn *insn;
8910 /* %ecx can't be used for both DRAP register and eh_return. */
8911 if (crtl->drap_reg)
8912 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
8914 /* regparm nested functions don't work with eh_return. */
8915 gcc_assert (!ix86_static_chain_on_stack);
8917 if (frame_pointer_needed)
8919 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8920 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
8921 emit_insn (gen_rtx_SET (sa, t));
8923 /* NB: eh_return epilogues must restore the frame pointer
8924 in word_mode since the upper 32 bits of RBP register
8925 can have any values. */
8926 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
8927 rtx frame_reg = gen_rtx_REG (word_mode,
8928 HARD_FRAME_POINTER_REGNUM);
8929 insn = emit_move_insn (frame_reg, t);
8931 /* Note that we use SA as a temporary CFA, as the return
8932 address is at the proper place relative to it. We
8933 pretend this happens at the FP restore insn because
8934 prior to this insn the FP would be stored at the wrong
8935 offset relative to SA, and after this insn we have no
8936 other reasonable register to use for the CFA. We don't
8937 bother resetting the CFA to the SP for the duration of
8938 the return insn, unless the control flow instrumentation
8939 is done. In this case the SP is used later and we have
8940 to reset CFA to SP. */
8941 add_reg_note (insn, REG_CFA_DEF_CFA,
8942 plus_constant (Pmode, sa, UNITS_PER_WORD));
8943 ix86_add_queued_cfa_restore_notes (insn);
8944 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
8945 RTX_FRAME_RELATED_P (insn) = 1;
8947 m->fs.cfa_reg = sa;
8948 m->fs.cfa_offset = UNITS_PER_WORD;
8949 m->fs.fp_valid = false;
8951 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8952 const0_rtx, style,
8953 flag_cf_protection);
8955 else
8957 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8958 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
8959 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
8960 ix86_add_queued_cfa_restore_notes (insn);
8962 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8963 if (m->fs.cfa_offset != UNITS_PER_WORD)
8965 m->fs.cfa_offset = UNITS_PER_WORD;
8966 add_reg_note (insn, REG_CFA_DEF_CFA,
8967 plus_constant (Pmode, stack_pointer_rtx,
8968 UNITS_PER_WORD));
8969 RTX_FRAME_RELATED_P (insn) = 1;
8972 m->fs.sp_offset = UNITS_PER_WORD;
8973 m->fs.sp_valid = true;
8974 m->fs.sp_realigned = false;
8977 else
8979 /* SEH requires that the function end with (1) a stack adjustment
8980 if necessary, (2) a sequence of pops, and (3) a return or
8981 jump instruction. Prevent insns from the function body from
8982 being scheduled into this sequence. */
8983 if (TARGET_SEH)
8985 /* Prevent a catch region from being adjacent to the standard
8986 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
8987 nor several other flags that would be interesting to test are
8988 set up yet. */
8989 if (flag_non_call_exceptions)
8990 emit_insn (gen_nops (const1_rtx));
8991 else
8992 emit_insn (gen_blockage ());
8995 /* First step is to deallocate the stack frame so that we can
8996 pop the registers. If the stack pointer was realigned, it needs
8997 to be restored now. Also do it on SEH target for very large
8998 frame as the emitted instructions aren't allowed by the ABI
8999 in epilogues. */
9000 if (!m->fs.sp_valid || m->fs.sp_realigned
9001 || (TARGET_SEH
9002 && (m->fs.sp_offset - reg_save_offset
9003 >= SEH_MAX_FRAME_SIZE)))
9005 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9006 GEN_INT (m->fs.fp_offset
9007 - reg_save_offset),
9008 style, false);
9010 else if (m->fs.sp_offset != reg_save_offset)
9012 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9013 GEN_INT (m->fs.sp_offset
9014 - reg_save_offset),
9015 style,
9016 m->fs.cfa_reg == stack_pointer_rtx);
9019 ix86_emit_restore_regs_using_pop ();
9022 /* If we used a stack pointer and haven't already got rid of it,
9023 then do so now. */
9024 if (m->fs.fp_valid)
9026 /* If the stack pointer is valid and pointing at the frame
9027 pointer store address, then we only need a pop. */
9028 if (sp_valid_at (frame.hfp_save_offset)
9029 && m->fs.sp_offset == frame.hfp_save_offset)
9030 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9031 /* Leave results in shorter dependency chains on CPUs that are
9032 able to grok it fast. */
9033 else if (TARGET_USE_LEAVE
9034 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9035 || !cfun->machine->use_fast_prologue_epilogue)
9036 ix86_emit_leave (NULL);
9037 else
9039 pro_epilogue_adjust_stack (stack_pointer_rtx,
9040 hard_frame_pointer_rtx,
9041 const0_rtx, style, !using_drap);
9042 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9046 if (using_drap)
9048 int param_ptr_offset = UNITS_PER_WORD;
9049 rtx_insn *insn;
9051 gcc_assert (stack_realign_drap);
9053 if (ix86_static_chain_on_stack)
9054 param_ptr_offset += UNITS_PER_WORD;
9055 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9056 param_ptr_offset += UNITS_PER_WORD;
9058 insn = emit_insn (gen_rtx_SET
9059 (stack_pointer_rtx,
9060 plus_constant (Pmode, crtl->drap_reg,
9061 -param_ptr_offset)));
9062 m->fs.cfa_reg = stack_pointer_rtx;
9063 m->fs.cfa_offset = param_ptr_offset;
9064 m->fs.sp_offset = param_ptr_offset;
9065 m->fs.realigned = false;
9067 add_reg_note (insn, REG_CFA_DEF_CFA,
9068 plus_constant (Pmode, stack_pointer_rtx,
9069 param_ptr_offset));
9070 RTX_FRAME_RELATED_P (insn) = 1;
9072 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9073 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9076 /* At this point the stack pointer must be valid, and we must have
9077 restored all of the registers. We may not have deallocated the
9078 entire stack frame. We've delayed this until now because it may
9079 be possible to merge the local stack deallocation with the
9080 deallocation forced by ix86_static_chain_on_stack. */
9081 gcc_assert (m->fs.sp_valid);
9082 gcc_assert (!m->fs.sp_realigned);
9083 gcc_assert (!m->fs.fp_valid);
9084 gcc_assert (!m->fs.realigned);
9085 if (m->fs.sp_offset != UNITS_PER_WORD)
9087 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9088 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9089 style, true);
9091 else
9092 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9094 /* Sibcall epilogues don't want a return instruction. */
9095 if (style == 0)
9097 m->fs = frame_state_save;
9098 return;
9101 if (cfun->machine->func_type != TYPE_NORMAL)
9102 emit_jump_insn (gen_interrupt_return ());
9103 else if (crtl->args.pops_args && crtl->args.size)
9105 rtx popc = GEN_INT (crtl->args.pops_args);
9107 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9108 address, do explicit add, and jump indirectly to the caller. */
9110 if (crtl->args.pops_args >= 65536)
9112 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9113 rtx_insn *insn;
9115 /* There is no "pascal" calling convention in any 64bit ABI. */
9116 gcc_assert (!TARGET_64BIT);
9118 insn = emit_insn (gen_pop (ecx));
9119 m->fs.cfa_offset -= UNITS_PER_WORD;
9120 m->fs.sp_offset -= UNITS_PER_WORD;
9122 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9123 x = gen_rtx_SET (stack_pointer_rtx, x);
9124 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9125 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9126 RTX_FRAME_RELATED_P (insn) = 1;
9128 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9129 popc, -1, true);
9130 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9132 else
9133 emit_jump_insn (gen_simple_return_pop_internal (popc));
9135 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9137 /* In case of return from EH a simple return cannot be used
9138 as a return address will be compared with a shadow stack
9139 return address. Use indirect jump instead. */
9140 if (style == 2 && flag_cf_protection)
9142 /* Register used in indirect jump must be in word_mode. But
9143 Pmode may not be the same as word_mode for x32. */
9144 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9145 rtx_insn *insn;
9147 insn = emit_insn (gen_pop (ecx));
9148 m->fs.cfa_offset -= UNITS_PER_WORD;
9149 m->fs.sp_offset -= UNITS_PER_WORD;
9151 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9152 x = gen_rtx_SET (stack_pointer_rtx, x);
9153 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9154 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9155 RTX_FRAME_RELATED_P (insn) = 1;
9157 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9159 else
9160 emit_jump_insn (gen_simple_return_internal ());
9163 /* Restore the state back to the state from the prologue,
9164 so that it's correct for the next epilogue. */
9165 m->fs = frame_state_save;
9168 /* Reset from the function's potential modifications. */
9170 static void
9171 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9173 if (pic_offset_table_rtx
9174 && !ix86_use_pseudo_pic_reg ())
9175 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9177 if (TARGET_MACHO)
9179 rtx_insn *insn = get_last_insn ();
9180 rtx_insn *deleted_debug_label = NULL;
9182 /* Mach-O doesn't support labels at the end of objects, so if
9183 it looks like we might want one, take special action.
9184 First, collect any sequence of deleted debug labels. */
9185 while (insn
9186 && NOTE_P (insn)
9187 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9189 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9190 notes only, instead set their CODE_LABEL_NUMBER to -1,
9191 otherwise there would be code generation differences
9192 in between -g and -g0. */
9193 if (NOTE_P (insn) && NOTE_KIND (insn)
9194 == NOTE_INSN_DELETED_DEBUG_LABEL)
9195 deleted_debug_label = insn;
9196 insn = PREV_INSN (insn);
9199 /* If we have:
9200 label:
9201 barrier
9202 then this needs to be detected, so skip past the barrier. */
9204 if (insn && BARRIER_P (insn))
9205 insn = PREV_INSN (insn);
9207 /* Up to now we've only seen notes or barriers. */
9208 if (insn)
9210 if (LABEL_P (insn)
9211 || (NOTE_P (insn)
9212 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9213 /* Trailing label. */
9214 fputs ("\tnop\n", file);
9215 else if (cfun && ! cfun->is_thunk)
9217 /* See if we have a completely empty function body, skipping
9218 the special case of the picbase thunk emitted as asm. */
9219 while (insn && ! INSN_P (insn))
9220 insn = PREV_INSN (insn);
9221 /* If we don't find any insns, we've got an empty function body;
9222 I.e. completely empty - without a return or branch. This is
9223 taken as the case where a function body has been removed
9224 because it contains an inline __builtin_unreachable(). GCC
9225 declares that reaching __builtin_unreachable() means UB so
9226 we're not obliged to do anything special; however, we want
9227 non-zero-sized function bodies. To meet this, and help the
9228 user out, let's trap the case. */
9229 if (insn == NULL)
9230 fputs ("\tud2\n", file);
9233 else if (deleted_debug_label)
9234 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9235 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9236 CODE_LABEL_NUMBER (insn) = -1;
9240 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9242 void
9243 ix86_print_patchable_function_entry (FILE *file,
9244 unsigned HOST_WIDE_INT patch_area_size,
9245 bool record_p)
9247 if (cfun->machine->function_label_emitted)
9249 /* NB: When ix86_print_patchable_function_entry is called after
9250 function table has been emitted, we have inserted or queued
9251 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9252 place. There is nothing to do here. */
9253 return;
9256 default_print_patchable_function_entry (file, patch_area_size,
9257 record_p);
9260 /* Output patchable area. NB: default_print_patchable_function_entry
9261 isn't available in i386.md. */
9263 void
9264 ix86_output_patchable_area (unsigned int patch_area_size,
9265 bool record_p)
9267 default_print_patchable_function_entry (asm_out_file,
9268 patch_area_size,
9269 record_p);
9272 /* Return a scratch register to use in the split stack prologue. The
9273 split stack prologue is used for -fsplit-stack. It is the first
9274 instructions in the function, even before the regular prologue.
9275 The scratch register can be any caller-saved register which is not
9276 used for parameters or for the static chain. */
9278 static unsigned int
9279 split_stack_prologue_scratch_regno (void)
9281 if (TARGET_64BIT)
9282 return R11_REG;
9283 else
9285 bool is_fastcall, is_thiscall;
9286 int regparm;
9288 is_fastcall = (lookup_attribute ("fastcall",
9289 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9290 != NULL);
9291 is_thiscall = (lookup_attribute ("thiscall",
9292 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9293 != NULL);
9294 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9296 if (is_fastcall)
9298 if (DECL_STATIC_CHAIN (cfun->decl))
9300 sorry ("%<-fsplit-stack%> does not support fastcall with "
9301 "nested function");
9302 return INVALID_REGNUM;
9304 return AX_REG;
9306 else if (is_thiscall)
9308 if (!DECL_STATIC_CHAIN (cfun->decl))
9309 return DX_REG;
9310 return AX_REG;
9312 else if (regparm < 3)
9314 if (!DECL_STATIC_CHAIN (cfun->decl))
9315 return CX_REG;
9316 else
9318 if (regparm >= 2)
9320 sorry ("%<-fsplit-stack%> does not support 2 register "
9321 "parameters for a nested function");
9322 return INVALID_REGNUM;
9324 return DX_REG;
9327 else
9329 /* FIXME: We could make this work by pushing a register
9330 around the addition and comparison. */
9331 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9332 return INVALID_REGNUM;
9337 /* A SYMBOL_REF for the function which allocates new stackspace for
9338 -fsplit-stack. */
9340 static GTY(()) rtx split_stack_fn;
9342 /* A SYMBOL_REF for the more stack function when using the large
9343 model. */
9345 static GTY(()) rtx split_stack_fn_large;
9347 /* Return location of the stack guard value in the TLS block. */
9350 ix86_split_stack_guard (void)
9352 int offset;
9353 addr_space_t as = DEFAULT_TLS_SEG_REG;
9354 rtx r;
9356 gcc_assert (flag_split_stack);
9358 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9359 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9360 #else
9361 gcc_unreachable ();
9362 #endif
9364 r = GEN_INT (offset);
9365 r = gen_const_mem (Pmode, r);
9366 set_mem_addr_space (r, as);
9368 return r;
9371 /* Handle -fsplit-stack. These are the first instructions in the
9372 function, even before the regular prologue. */
9374 void
9375 ix86_expand_split_stack_prologue (void)
9377 HOST_WIDE_INT allocate;
9378 unsigned HOST_WIDE_INT args_size;
9379 rtx_code_label *label;
9380 rtx limit, current, allocate_rtx, call_fusage;
9381 rtx_insn *call_insn;
9382 rtx scratch_reg = NULL_RTX;
9383 rtx_code_label *varargs_label = NULL;
9384 rtx fn;
9386 gcc_assert (flag_split_stack && reload_completed);
9388 ix86_finalize_stack_frame_flags ();
9389 struct ix86_frame &frame = cfun->machine->frame;
9390 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9392 /* This is the label we will branch to if we have enough stack
9393 space. We expect the basic block reordering pass to reverse this
9394 branch if optimizing, so that we branch in the unlikely case. */
9395 label = gen_label_rtx ();
9397 /* We need to compare the stack pointer minus the frame size with
9398 the stack boundary in the TCB. The stack boundary always gives
9399 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9400 can compare directly. Otherwise we need to do an addition. */
9402 limit = ix86_split_stack_guard ();
9404 if (allocate < SPLIT_STACK_AVAILABLE)
9405 current = stack_pointer_rtx;
9406 else
9408 unsigned int scratch_regno;
9409 rtx offset;
9411 /* We need a scratch register to hold the stack pointer minus
9412 the required frame size. Since this is the very start of the
9413 function, the scratch register can be any caller-saved
9414 register which is not used for parameters. */
9415 offset = GEN_INT (- allocate);
9416 scratch_regno = split_stack_prologue_scratch_regno ();
9417 if (scratch_regno == INVALID_REGNUM)
9418 return;
9419 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9420 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9422 /* We don't use gen_add in this case because it will
9423 want to split to lea, but when not optimizing the insn
9424 will not be split after this point. */
9425 emit_insn (gen_rtx_SET (scratch_reg,
9426 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9427 offset)));
9429 else
9431 emit_move_insn (scratch_reg, offset);
9432 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9434 current = scratch_reg;
9437 ix86_expand_branch (GEU, current, limit, label);
9438 rtx_insn *jump_insn = get_last_insn ();
9439 JUMP_LABEL (jump_insn) = label;
9441 /* Mark the jump as very likely to be taken. */
9442 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9444 if (split_stack_fn == NULL_RTX)
9446 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9447 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9449 fn = split_stack_fn;
9451 /* Get more stack space. We pass in the desired stack space and the
9452 size of the arguments to copy to the new stack. In 32-bit mode
9453 we push the parameters; __morestack will return on a new stack
9454 anyhow. In 64-bit mode we pass the parameters in r10 and
9455 r11. */
9456 allocate_rtx = GEN_INT (allocate);
9457 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9458 call_fusage = NULL_RTX;
9459 rtx pop = NULL_RTX;
9460 if (TARGET_64BIT)
9462 rtx reg10, reg11;
9464 reg10 = gen_rtx_REG (Pmode, R10_REG);
9465 reg11 = gen_rtx_REG (Pmode, R11_REG);
9467 /* If this function uses a static chain, it will be in %r10.
9468 Preserve it across the call to __morestack. */
9469 if (DECL_STATIC_CHAIN (cfun->decl))
9471 rtx rax;
9473 rax = gen_rtx_REG (word_mode, AX_REG);
9474 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9475 use_reg (&call_fusage, rax);
9478 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9479 && !TARGET_PECOFF)
9481 HOST_WIDE_INT argval;
9483 gcc_assert (Pmode == DImode);
9484 /* When using the large model we need to load the address
9485 into a register, and we've run out of registers. So we
9486 switch to a different calling convention, and we call a
9487 different function: __morestack_large. We pass the
9488 argument size in the upper 32 bits of r10 and pass the
9489 frame size in the lower 32 bits. */
9490 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9491 gcc_assert ((args_size & 0xffffffff) == args_size);
9493 if (split_stack_fn_large == NULL_RTX)
9495 split_stack_fn_large
9496 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9497 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9499 if (ix86_cmodel == CM_LARGE_PIC)
9501 rtx_code_label *label;
9502 rtx x;
9504 label = gen_label_rtx ();
9505 emit_label (label);
9506 LABEL_PRESERVE_P (label) = 1;
9507 emit_insn (gen_set_rip_rex64 (reg10, label));
9508 emit_insn (gen_set_got_offset_rex64 (reg11, label));
9509 emit_insn (gen_add2_insn (reg10, reg11));
9510 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9511 UNSPEC_GOT);
9512 x = gen_rtx_CONST (Pmode, x);
9513 emit_move_insn (reg11, x);
9514 x = gen_rtx_PLUS (Pmode, reg10, reg11);
9515 x = gen_const_mem (Pmode, x);
9516 emit_move_insn (reg11, x);
9518 else
9519 emit_move_insn (reg11, split_stack_fn_large);
9521 fn = reg11;
9523 argval = ((args_size << 16) << 16) + allocate;
9524 emit_move_insn (reg10, GEN_INT (argval));
9526 else
9528 emit_move_insn (reg10, allocate_rtx);
9529 emit_move_insn (reg11, GEN_INT (args_size));
9530 use_reg (&call_fusage, reg11);
9533 use_reg (&call_fusage, reg10);
9535 else
9537 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9538 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9539 insn = emit_insn (gen_push (allocate_rtx));
9540 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9541 pop = GEN_INT (2 * UNITS_PER_WORD);
9543 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9544 GEN_INT (UNITS_PER_WORD), constm1_rtx,
9545 pop, false);
9546 add_function_usage_to (call_insn, call_fusage);
9547 if (!TARGET_64BIT)
9548 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9549 /* Indicate that this function can't jump to non-local gotos. */
9550 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9552 /* In order to make call/return prediction work right, we now need
9553 to execute a return instruction. See
9554 libgcc/config/i386/morestack.S for the details on how this works.
9556 For flow purposes gcc must not see this as a return
9557 instruction--we need control flow to continue at the subsequent
9558 label. Therefore, we use an unspec. */
9559 gcc_assert (crtl->args.pops_args < 65536);
9560 rtx_insn *ret_insn
9561 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9563 if ((flag_cf_protection & CF_BRANCH))
9565 /* Insert ENDBR since __morestack will jump back here via indirect
9566 call. */
9567 rtx cet_eb = gen_nop_endbr ();
9568 emit_insn_after (cet_eb, ret_insn);
9571 /* If we are in 64-bit mode and this function uses a static chain,
9572 we saved %r10 in %rax before calling _morestack. */
9573 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9574 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9575 gen_rtx_REG (word_mode, AX_REG));
9577 /* If this function calls va_start, we need to store a pointer to
9578 the arguments on the old stack, because they may not have been
9579 all copied to the new stack. At this point the old stack can be
9580 found at the frame pointer value used by __morestack, because
9581 __morestack has set that up before calling back to us. Here we
9582 store that pointer in a scratch register, and in
9583 ix86_expand_prologue we store the scratch register in a stack
9584 slot. */
9585 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9587 unsigned int scratch_regno;
9588 rtx frame_reg;
9589 int words;
9591 scratch_regno = split_stack_prologue_scratch_regno ();
9592 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9593 frame_reg = gen_rtx_REG (Pmode, BP_REG);
9595 /* 64-bit:
9596 fp -> old fp value
9597 return address within this function
9598 return address of caller of this function
9599 stack arguments
9600 So we add three words to get to the stack arguments.
9602 32-bit:
9603 fp -> old fp value
9604 return address within this function
9605 first argument to __morestack
9606 second argument to __morestack
9607 return address of caller of this function
9608 stack arguments
9609 So we add five words to get to the stack arguments.
9611 words = TARGET_64BIT ? 3 : 5;
9612 emit_insn (gen_rtx_SET (scratch_reg,
9613 plus_constant (Pmode, frame_reg,
9614 words * UNITS_PER_WORD)));
9616 varargs_label = gen_label_rtx ();
9617 emit_jump_insn (gen_jump (varargs_label));
9618 JUMP_LABEL (get_last_insn ()) = varargs_label;
9620 emit_barrier ();
9623 emit_label (label);
9624 LABEL_NUSES (label) = 1;
9626 /* If this function calls va_start, we now have to set the scratch
9627 register for the case where we do not call __morestack. In this
9628 case we need to set it based on the stack pointer. */
9629 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9631 emit_insn (gen_rtx_SET (scratch_reg,
9632 plus_constant (Pmode, stack_pointer_rtx,
9633 UNITS_PER_WORD)));
9635 emit_label (varargs_label);
9636 LABEL_NUSES (varargs_label) = 1;
9640 /* We may have to tell the dataflow pass that the split stack prologue
9641 is initializing a scratch register. */
9643 static void
9644 ix86_live_on_entry (bitmap regs)
9646 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9648 gcc_assert (flag_split_stack);
9649 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9653 /* Extract the parts of an RTL expression that is a valid memory address
9654 for an instruction. Return 0 if the structure of the address is
9655 grossly off. Return -1 if the address contains ASHIFT, so it is not
9656 strictly valid, but still used for computing length of lea instruction. */
9659 ix86_decompose_address (rtx addr, struct ix86_address *out)
9661 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9662 rtx base_reg, index_reg;
9663 HOST_WIDE_INT scale = 1;
9664 rtx scale_rtx = NULL_RTX;
9665 rtx tmp;
9666 int retval = 1;
9667 addr_space_t seg = ADDR_SPACE_GENERIC;
9669 /* Allow zero-extended SImode addresses,
9670 they will be emitted with addr32 prefix. */
9671 if (TARGET_64BIT && GET_MODE (addr) == DImode)
9673 if (GET_CODE (addr) == ZERO_EXTEND
9674 && GET_MODE (XEXP (addr, 0)) == SImode)
9676 addr = XEXP (addr, 0);
9677 if (CONST_INT_P (addr))
9678 return 0;
9680 else if (GET_CODE (addr) == AND
9681 && const_32bit_mask (XEXP (addr, 1), DImode))
9683 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9684 if (addr == NULL_RTX)
9685 return 0;
9687 if (CONST_INT_P (addr))
9688 return 0;
9692 /* Allow SImode subregs of DImode addresses,
9693 they will be emitted with addr32 prefix. */
9694 if (TARGET_64BIT && GET_MODE (addr) == SImode)
9696 if (SUBREG_P (addr)
9697 && GET_MODE (SUBREG_REG (addr)) == DImode)
9699 addr = SUBREG_REG (addr);
9700 if (CONST_INT_P (addr))
9701 return 0;
9705 if (REG_P (addr))
9706 base = addr;
9707 else if (SUBREG_P (addr))
9709 if (REG_P (SUBREG_REG (addr)))
9710 base = addr;
9711 else
9712 return 0;
9714 else if (GET_CODE (addr) == PLUS)
9716 rtx addends[4], op;
9717 int n = 0, i;
9719 op = addr;
9722 if (n >= 4)
9723 return 0;
9724 addends[n++] = XEXP (op, 1);
9725 op = XEXP (op, 0);
9727 while (GET_CODE (op) == PLUS);
9728 if (n >= 4)
9729 return 0;
9730 addends[n] = op;
9732 for (i = n; i >= 0; --i)
9734 op = addends[i];
9735 switch (GET_CODE (op))
9737 case MULT:
9738 if (index)
9739 return 0;
9740 index = XEXP (op, 0);
9741 scale_rtx = XEXP (op, 1);
9742 break;
9744 case ASHIFT:
9745 if (index)
9746 return 0;
9747 index = XEXP (op, 0);
9748 tmp = XEXP (op, 1);
9749 if (!CONST_INT_P (tmp))
9750 return 0;
9751 scale = INTVAL (tmp);
9752 if ((unsigned HOST_WIDE_INT) scale > 3)
9753 return 0;
9754 scale = 1 << scale;
9755 break;
9757 case ZERO_EXTEND:
9758 op = XEXP (op, 0);
9759 if (GET_CODE (op) != UNSPEC)
9760 return 0;
9761 /* FALLTHRU */
9763 case UNSPEC:
9764 if (XINT (op, 1) == UNSPEC_TP
9765 && TARGET_TLS_DIRECT_SEG_REFS
9766 && seg == ADDR_SPACE_GENERIC)
9767 seg = DEFAULT_TLS_SEG_REG;
9768 else
9769 return 0;
9770 break;
9772 case SUBREG:
9773 if (!REG_P (SUBREG_REG (op)))
9774 return 0;
9775 /* FALLTHRU */
9777 case REG:
9778 if (!base)
9779 base = op;
9780 else if (!index)
9781 index = op;
9782 else
9783 return 0;
9784 break;
9786 case CONST:
9787 case CONST_INT:
9788 case SYMBOL_REF:
9789 case LABEL_REF:
9790 if (disp)
9791 return 0;
9792 disp = op;
9793 break;
9795 default:
9796 return 0;
9800 else if (GET_CODE (addr) == MULT)
9802 index = XEXP (addr, 0); /* index*scale */
9803 scale_rtx = XEXP (addr, 1);
9805 else if (GET_CODE (addr) == ASHIFT)
9807 /* We're called for lea too, which implements ashift on occasion. */
9808 index = XEXP (addr, 0);
9809 tmp = XEXP (addr, 1);
9810 if (!CONST_INT_P (tmp))
9811 return 0;
9812 scale = INTVAL (tmp);
9813 if ((unsigned HOST_WIDE_INT) scale > 3)
9814 return 0;
9815 scale = 1 << scale;
9816 retval = -1;
9818 else
9819 disp = addr; /* displacement */
9821 if (index)
9823 if (REG_P (index))
9825 else if (SUBREG_P (index)
9826 && REG_P (SUBREG_REG (index)))
9828 else
9829 return 0;
9832 /* Extract the integral value of scale. */
9833 if (scale_rtx)
9835 if (!CONST_INT_P (scale_rtx))
9836 return 0;
9837 scale = INTVAL (scale_rtx);
9840 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
9841 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
9843 /* Avoid useless 0 displacement. */
9844 if (disp == const0_rtx && (base || index))
9845 disp = NULL_RTX;
9847 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9848 if (base_reg && index_reg && scale == 1
9849 && (REGNO (index_reg) == ARG_POINTER_REGNUM
9850 || REGNO (index_reg) == FRAME_POINTER_REGNUM
9851 || REGNO (index_reg) == SP_REG))
9853 std::swap (base, index);
9854 std::swap (base_reg, index_reg);
9857 /* Special case: %ebp cannot be encoded as a base without a displacement.
9858 Similarly %r13. */
9859 if (!disp && base_reg
9860 && (REGNO (base_reg) == ARG_POINTER_REGNUM
9861 || REGNO (base_reg) == FRAME_POINTER_REGNUM
9862 || REGNO (base_reg) == BP_REG
9863 || REGNO (base_reg) == R13_REG))
9864 disp = const0_rtx;
9866 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9867 Avoid this by transforming to [%esi+0].
9868 Reload calls address legitimization without cfun defined, so we need
9869 to test cfun for being non-NULL. */
9870 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9871 && base_reg && !index_reg && !disp
9872 && REGNO (base_reg) == SI_REG)
9873 disp = const0_rtx;
9875 /* Special case: encode reg+reg instead of reg*2. */
9876 if (!base && index && scale == 2)
9877 base = index, base_reg = index_reg, scale = 1;
9879 /* Special case: scaling cannot be encoded without base or displacement. */
9880 if (!base && !disp && index && scale != 1)
9881 disp = const0_rtx;
9883 out->base = base;
9884 out->index = index;
9885 out->disp = disp;
9886 out->scale = scale;
9887 out->seg = seg;
9889 return retval;
9892 /* Return cost of the memory address x.
9893 For i386, it is better to use a complex address than let gcc copy
9894 the address into a reg and make a new pseudo. But not if the address
9895 requires to two regs - that would mean more pseudos with longer
9896 lifetimes. */
9897 static int
9898 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
9900 struct ix86_address parts;
9901 int cost = 1;
9902 int ok = ix86_decompose_address (x, &parts);
9904 gcc_assert (ok);
9906 if (parts.base && SUBREG_P (parts.base))
9907 parts.base = SUBREG_REG (parts.base);
9908 if (parts.index && SUBREG_P (parts.index))
9909 parts.index = SUBREG_REG (parts.index);
9911 /* Attempt to minimize number of registers in the address by increasing
9912 address cost for each used register. We don't increase address cost
9913 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
9914 is not invariant itself it most likely means that base or index is not
9915 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
9916 which is not profitable for x86. */
9917 if (parts.base
9918 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9919 && (current_pass->type == GIMPLE_PASS
9920 || !pic_offset_table_rtx
9921 || !REG_P (parts.base)
9922 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
9923 cost++;
9925 if (parts.index
9926 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9927 && (current_pass->type == GIMPLE_PASS
9928 || !pic_offset_table_rtx
9929 || !REG_P (parts.index)
9930 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
9931 cost++;
9933 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9934 since it's predecode logic can't detect the length of instructions
9935 and it degenerates to vector decoded. Increase cost of such
9936 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9937 to split such addresses or even refuse such addresses at all.
9939 Following addressing modes are affected:
9940 [base+scale*index]
9941 [scale*index+disp]
9942 [base+index]
9944 The first and last case may be avoidable by explicitly coding the zero in
9945 memory address, but I don't have AMD-K6 machine handy to check this
9946 theory. */
9948 if (TARGET_K6
9949 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9950 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9951 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9952 cost += 10;
9954 return cost;
9957 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9958 this is used for to form addresses to local data when -fPIC is in
9959 use. */
9961 static bool
9962 darwin_local_data_pic (rtx disp)
9964 return (GET_CODE (disp) == UNSPEC
9965 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9968 /* True if operand X should be loaded from GOT. */
9970 bool
9971 ix86_force_load_from_GOT_p (rtx x)
9973 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
9974 && !TARGET_PECOFF && !TARGET_MACHO
9975 && !flag_pic
9976 && ix86_cmodel != CM_LARGE
9977 && GET_CODE (x) == SYMBOL_REF
9978 && SYMBOL_REF_FUNCTION_P (x)
9979 && (!flag_plt
9980 || (SYMBOL_REF_DECL (x)
9981 && lookup_attribute ("noplt",
9982 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
9983 && !SYMBOL_REF_LOCAL_P (x));
9986 /* Determine if a given RTX is a valid constant. We already know this
9987 satisfies CONSTANT_P. */
9989 static bool
9990 ix86_legitimate_constant_p (machine_mode mode, rtx x)
9992 switch (GET_CODE (x))
9994 case CONST:
9995 x = XEXP (x, 0);
9997 if (GET_CODE (x) == PLUS)
9999 if (!CONST_INT_P (XEXP (x, 1)))
10000 return false;
10001 x = XEXP (x, 0);
10004 if (TARGET_MACHO && darwin_local_data_pic (x))
10005 return true;
10007 /* Only some unspecs are valid as "constants". */
10008 if (GET_CODE (x) == UNSPEC)
10009 switch (XINT (x, 1))
10011 case UNSPEC_GOT:
10012 case UNSPEC_GOTOFF:
10013 case UNSPEC_PLTOFF:
10014 return TARGET_64BIT;
10015 case UNSPEC_TPOFF:
10016 case UNSPEC_NTPOFF:
10017 x = XVECEXP (x, 0, 0);
10018 return (GET_CODE (x) == SYMBOL_REF
10019 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10020 case UNSPEC_DTPOFF:
10021 x = XVECEXP (x, 0, 0);
10022 return (GET_CODE (x) == SYMBOL_REF
10023 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10024 default:
10025 return false;
10028 /* We must have drilled down to a symbol. */
10029 if (GET_CODE (x) == LABEL_REF)
10030 return true;
10031 if (GET_CODE (x) != SYMBOL_REF)
10032 return false;
10033 /* FALLTHRU */
10035 case SYMBOL_REF:
10036 /* TLS symbols are never valid. */
10037 if (SYMBOL_REF_TLS_MODEL (x))
10038 return false;
10040 /* DLLIMPORT symbols are never valid. */
10041 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10042 && SYMBOL_REF_DLLIMPORT_P (x))
10043 return false;
10045 #if TARGET_MACHO
10046 /* mdynamic-no-pic */
10047 if (MACHO_DYNAMIC_NO_PIC_P)
10048 return machopic_symbol_defined_p (x);
10049 #endif
10051 /* External function address should be loaded
10052 via the GOT slot to avoid PLT. */
10053 if (ix86_force_load_from_GOT_p (x))
10054 return false;
10056 break;
10058 CASE_CONST_SCALAR_INT:
10059 switch (mode)
10061 case E_TImode:
10062 if (TARGET_64BIT)
10063 return true;
10064 /* FALLTHRU */
10065 case E_OImode:
10066 case E_XImode:
10067 if (!standard_sse_constant_p (x, mode))
10068 return false;
10069 default:
10070 break;
10072 break;
10074 case CONST_VECTOR:
10075 if (!standard_sse_constant_p (x, mode))
10076 return false;
10078 default:
10079 break;
10082 /* Otherwise we handle everything else in the move patterns. */
10083 return true;
10086 /* Determine if it's legal to put X into the constant pool. This
10087 is not possible for the address of thread-local symbols, which
10088 is checked above. */
10090 static bool
10091 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10093 /* We can put any immediate constant in memory. */
10094 switch (GET_CODE (x))
10096 CASE_CONST_ANY:
10097 return false;
10099 default:
10100 break;
10103 return !ix86_legitimate_constant_p (mode, x);
10106 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10107 otherwise zero. */
10109 static bool
10110 is_imported_p (rtx x)
10112 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10113 || GET_CODE (x) != SYMBOL_REF)
10114 return false;
10116 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10120 /* Nonzero if the constant value X is a legitimate general operand
10121 when generating PIC code. It is given that flag_pic is on and
10122 that X satisfies CONSTANT_P. */
10124 bool
10125 legitimate_pic_operand_p (rtx x)
10127 rtx inner;
10129 switch (GET_CODE (x))
10131 case CONST:
10132 inner = XEXP (x, 0);
10133 if (GET_CODE (inner) == PLUS
10134 && CONST_INT_P (XEXP (inner, 1)))
10135 inner = XEXP (inner, 0);
10137 /* Only some unspecs are valid as "constants". */
10138 if (GET_CODE (inner) == UNSPEC)
10139 switch (XINT (inner, 1))
10141 case UNSPEC_GOT:
10142 case UNSPEC_GOTOFF:
10143 case UNSPEC_PLTOFF:
10144 return TARGET_64BIT;
10145 case UNSPEC_TPOFF:
10146 x = XVECEXP (inner, 0, 0);
10147 return (GET_CODE (x) == SYMBOL_REF
10148 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10149 case UNSPEC_MACHOPIC_OFFSET:
10150 return legitimate_pic_address_disp_p (x);
10151 default:
10152 return false;
10154 /* FALLTHRU */
10156 case SYMBOL_REF:
10157 case LABEL_REF:
10158 return legitimate_pic_address_disp_p (x);
10160 default:
10161 return true;
10165 /* Determine if a given CONST RTX is a valid memory displacement
10166 in PIC mode. */
10168 bool
10169 legitimate_pic_address_disp_p (rtx disp)
10171 bool saw_plus;
10173 /* In 64bit mode we can allow direct addresses of symbols and labels
10174 when they are not dynamic symbols. */
10175 if (TARGET_64BIT)
10177 rtx op0 = disp, op1;
10179 switch (GET_CODE (disp))
10181 case LABEL_REF:
10182 return true;
10184 case CONST:
10185 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10186 break;
10187 op0 = XEXP (XEXP (disp, 0), 0);
10188 op1 = XEXP (XEXP (disp, 0), 1);
10189 if (!CONST_INT_P (op1))
10190 break;
10191 if (GET_CODE (op0) == UNSPEC
10192 && (XINT (op0, 1) == UNSPEC_DTPOFF
10193 || XINT (op0, 1) == UNSPEC_NTPOFF)
10194 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10195 return true;
10196 if (INTVAL (op1) >= 16*1024*1024
10197 || INTVAL (op1) < -16*1024*1024)
10198 break;
10199 if (GET_CODE (op0) == LABEL_REF)
10200 return true;
10201 if (GET_CODE (op0) == CONST
10202 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10203 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10204 return true;
10205 if (GET_CODE (op0) == UNSPEC
10206 && XINT (op0, 1) == UNSPEC_PCREL)
10207 return true;
10208 if (GET_CODE (op0) != SYMBOL_REF)
10209 break;
10210 /* FALLTHRU */
10212 case SYMBOL_REF:
10213 /* TLS references should always be enclosed in UNSPEC.
10214 The dllimported symbol needs always to be resolved. */
10215 if (SYMBOL_REF_TLS_MODEL (op0)
10216 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10217 return false;
10219 if (TARGET_PECOFF)
10221 if (is_imported_p (op0))
10222 return true;
10224 if (SYMBOL_REF_FAR_ADDR_P (op0)
10225 || !SYMBOL_REF_LOCAL_P (op0))
10226 break;
10228 /* Function-symbols need to be resolved only for
10229 large-model.
10230 For the small-model we don't need to resolve anything
10231 here. */
10232 if ((ix86_cmodel != CM_LARGE_PIC
10233 && SYMBOL_REF_FUNCTION_P (op0))
10234 || ix86_cmodel == CM_SMALL_PIC)
10235 return true;
10236 /* Non-external symbols don't need to be resolved for
10237 large, and medium-model. */
10238 if ((ix86_cmodel == CM_LARGE_PIC
10239 || ix86_cmodel == CM_MEDIUM_PIC)
10240 && !SYMBOL_REF_EXTERNAL_P (op0))
10241 return true;
10243 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10244 && (SYMBOL_REF_LOCAL_P (op0)
10245 || (HAVE_LD_PIE_COPYRELOC
10246 && flag_pie
10247 && !SYMBOL_REF_WEAK (op0)
10248 && !SYMBOL_REF_FUNCTION_P (op0)))
10249 && ix86_cmodel != CM_LARGE_PIC)
10250 return true;
10251 break;
10253 default:
10254 break;
10257 if (GET_CODE (disp) != CONST)
10258 return false;
10259 disp = XEXP (disp, 0);
10261 if (TARGET_64BIT)
10263 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10264 of GOT tables. We should not need these anyway. */
10265 if (GET_CODE (disp) != UNSPEC
10266 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10267 && XINT (disp, 1) != UNSPEC_GOTOFF
10268 && XINT (disp, 1) != UNSPEC_PCREL
10269 && XINT (disp, 1) != UNSPEC_PLTOFF))
10270 return false;
10272 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10273 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10274 return false;
10275 return true;
10278 saw_plus = false;
10279 if (GET_CODE (disp) == PLUS)
10281 if (!CONST_INT_P (XEXP (disp, 1)))
10282 return false;
10283 disp = XEXP (disp, 0);
10284 saw_plus = true;
10287 if (TARGET_MACHO && darwin_local_data_pic (disp))
10288 return true;
10290 if (GET_CODE (disp) != UNSPEC)
10291 return false;
10293 switch (XINT (disp, 1))
10295 case UNSPEC_GOT:
10296 if (saw_plus)
10297 return false;
10298 /* We need to check for both symbols and labels because VxWorks loads
10299 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10300 details. */
10301 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10302 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10303 case UNSPEC_GOTOFF:
10304 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10305 While ABI specify also 32bit relocation but we don't produce it in
10306 small PIC model at all. */
10307 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10308 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10309 && !TARGET_64BIT)
10310 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10311 return false;
10312 case UNSPEC_GOTTPOFF:
10313 case UNSPEC_GOTNTPOFF:
10314 case UNSPEC_INDNTPOFF:
10315 if (saw_plus)
10316 return false;
10317 disp = XVECEXP (disp, 0, 0);
10318 return (GET_CODE (disp) == SYMBOL_REF
10319 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10320 case UNSPEC_NTPOFF:
10321 disp = XVECEXP (disp, 0, 0);
10322 return (GET_CODE (disp) == SYMBOL_REF
10323 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10324 case UNSPEC_DTPOFF:
10325 disp = XVECEXP (disp, 0, 0);
10326 return (GET_CODE (disp) == SYMBOL_REF
10327 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10330 return false;
10333 /* Determine if op is suitable RTX for an address register.
10334 Return naked register if a register or a register subreg is
10335 found, otherwise return NULL_RTX. */
10337 static rtx
10338 ix86_validate_address_register (rtx op)
10340 machine_mode mode = GET_MODE (op);
10342 /* Only SImode or DImode registers can form the address. */
10343 if (mode != SImode && mode != DImode)
10344 return NULL_RTX;
10346 if (REG_P (op))
10347 return op;
10348 else if (SUBREG_P (op))
10350 rtx reg = SUBREG_REG (op);
10352 if (!REG_P (reg))
10353 return NULL_RTX;
10355 mode = GET_MODE (reg);
10357 /* Don't allow SUBREGs that span more than a word. It can
10358 lead to spill failures when the register is one word out
10359 of a two word structure. */
10360 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10361 return NULL_RTX;
10363 /* Allow only SUBREGs of non-eliminable hard registers. */
10364 if (register_no_elim_operand (reg, mode))
10365 return reg;
10368 /* Op is not a register. */
10369 return NULL_RTX;
10372 /* Recognizes RTL expressions that are valid memory addresses for an
10373 instruction. The MODE argument is the machine mode for the MEM
10374 expression that wants to use this address.
10376 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10377 convert common non-canonical forms to canonical form so that they will
10378 be recognized. */
10380 static bool
10381 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10383 struct ix86_address parts;
10384 rtx base, index, disp;
10385 HOST_WIDE_INT scale;
10386 addr_space_t seg;
10388 if (ix86_decompose_address (addr, &parts) <= 0)
10389 /* Decomposition failed. */
10390 return false;
10392 base = parts.base;
10393 index = parts.index;
10394 disp = parts.disp;
10395 scale = parts.scale;
10396 seg = parts.seg;
10398 /* Validate base register. */
10399 if (base)
10401 rtx reg = ix86_validate_address_register (base);
10403 if (reg == NULL_RTX)
10404 return false;
10406 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10407 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10408 /* Base is not valid. */
10409 return false;
10412 /* Validate index register. */
10413 if (index)
10415 rtx reg = ix86_validate_address_register (index);
10417 if (reg == NULL_RTX)
10418 return false;
10420 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10421 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10422 /* Index is not valid. */
10423 return false;
10426 /* Index and base should have the same mode. */
10427 if (base && index
10428 && GET_MODE (base) != GET_MODE (index))
10429 return false;
10431 /* Address override works only on the (%reg) part of %fs:(%reg). */
10432 if (seg != ADDR_SPACE_GENERIC
10433 && ((base && GET_MODE (base) != word_mode)
10434 || (index && GET_MODE (index) != word_mode)))
10435 return false;
10437 /* Validate scale factor. */
10438 if (scale != 1)
10440 if (!index)
10441 /* Scale without index. */
10442 return false;
10444 if (scale != 2 && scale != 4 && scale != 8)
10445 /* Scale is not a valid multiplier. */
10446 return false;
10449 /* Validate displacement. */
10450 if (disp)
10452 if (GET_CODE (disp) == CONST
10453 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10454 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10455 switch (XINT (XEXP (disp, 0), 1))
10457 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10458 when used. While ABI specify also 32bit relocations, we
10459 don't produce them at all and use IP relative instead.
10460 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10461 should be loaded via GOT. */
10462 case UNSPEC_GOT:
10463 if (!TARGET_64BIT
10464 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10465 goto is_legitimate_pic;
10466 /* FALLTHRU */
10467 case UNSPEC_GOTOFF:
10468 gcc_assert (flag_pic);
10469 if (!TARGET_64BIT)
10470 goto is_legitimate_pic;
10472 /* 64bit address unspec. */
10473 return false;
10475 case UNSPEC_GOTPCREL:
10476 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10477 goto is_legitimate_pic;
10478 /* FALLTHRU */
10479 case UNSPEC_PCREL:
10480 gcc_assert (flag_pic);
10481 goto is_legitimate_pic;
10483 case UNSPEC_GOTTPOFF:
10484 case UNSPEC_GOTNTPOFF:
10485 case UNSPEC_INDNTPOFF:
10486 case UNSPEC_NTPOFF:
10487 case UNSPEC_DTPOFF:
10488 break;
10490 default:
10491 /* Invalid address unspec. */
10492 return false;
10495 else if (SYMBOLIC_CONST (disp)
10496 && (flag_pic
10497 || (TARGET_MACHO
10498 #if TARGET_MACHO
10499 && MACHOPIC_INDIRECT
10500 && !machopic_operand_p (disp)
10501 #endif
10505 is_legitimate_pic:
10506 if (TARGET_64BIT && (index || base))
10508 /* foo@dtpoff(%rX) is ok. */
10509 if (GET_CODE (disp) != CONST
10510 || GET_CODE (XEXP (disp, 0)) != PLUS
10511 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10512 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10513 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10514 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10515 /* Non-constant pic memory reference. */
10516 return false;
10518 else if ((!TARGET_MACHO || flag_pic)
10519 && ! legitimate_pic_address_disp_p (disp))
10520 /* Displacement is an invalid pic construct. */
10521 return false;
10522 #if TARGET_MACHO
10523 else if (MACHO_DYNAMIC_NO_PIC_P
10524 && !ix86_legitimate_constant_p (Pmode, disp))
10525 /* displacment must be referenced via non_lazy_pointer */
10526 return false;
10527 #endif
10529 /* This code used to verify that a symbolic pic displacement
10530 includes the pic_offset_table_rtx register.
10532 While this is good idea, unfortunately these constructs may
10533 be created by "adds using lea" optimization for incorrect
10534 code like:
10536 int a;
10537 int foo(int i)
10539 return *(&a+i);
10542 This code is nonsensical, but results in addressing
10543 GOT table with pic_offset_table_rtx base. We can't
10544 just refuse it easily, since it gets matched by
10545 "addsi3" pattern, that later gets split to lea in the
10546 case output register differs from input. While this
10547 can be handled by separate addsi pattern for this case
10548 that never results in lea, this seems to be easier and
10549 correct fix for crash to disable this test. */
10551 else if (GET_CODE (disp) != LABEL_REF
10552 && !CONST_INT_P (disp)
10553 && (GET_CODE (disp) != CONST
10554 || !ix86_legitimate_constant_p (Pmode, disp))
10555 && (GET_CODE (disp) != SYMBOL_REF
10556 || !ix86_legitimate_constant_p (Pmode, disp)))
10557 /* Displacement is not constant. */
10558 return false;
10559 else if (TARGET_64BIT
10560 && !x86_64_immediate_operand (disp, VOIDmode))
10561 /* Displacement is out of range. */
10562 return false;
10563 /* In x32 mode, constant addresses are sign extended to 64bit, so
10564 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10565 else if (TARGET_X32 && !(index || base)
10566 && CONST_INT_P (disp)
10567 && val_signbit_known_set_p (SImode, INTVAL (disp)))
10568 return false;
10571 /* Everything looks valid. */
10572 return true;
10575 /* Determine if a given RTX is a valid constant address. */
10577 bool
10578 constant_address_p (rtx x)
10580 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10583 /* Return a unique alias set for the GOT. */
10585 alias_set_type
10586 ix86_GOT_alias_set (void)
10588 static alias_set_type set = -1;
10589 if (set == -1)
10590 set = new_alias_set ();
10591 return set;
10594 /* Return a legitimate reference for ORIG (an address) using the
10595 register REG. If REG is 0, a new pseudo is generated.
10597 There are two types of references that must be handled:
10599 1. Global data references must load the address from the GOT, via
10600 the PIC reg. An insn is emitted to do this load, and the reg is
10601 returned.
10603 2. Static data references, constant pool addresses, and code labels
10604 compute the address as an offset from the GOT, whose base is in
10605 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10606 differentiate them from global data objects. The returned
10607 address is the PIC reg + an unspec constant.
10609 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10610 reg also appears in the address. */
10613 legitimize_pic_address (rtx orig, rtx reg)
10615 rtx addr = orig;
10616 rtx new_rtx = orig;
10618 #if TARGET_MACHO
10619 if (TARGET_MACHO && !TARGET_64BIT)
10621 if (reg == 0)
10622 reg = gen_reg_rtx (Pmode);
10623 /* Use the generic Mach-O PIC machinery. */
10624 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10626 #endif
10628 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10630 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10631 if (tmp)
10632 return tmp;
10635 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10636 new_rtx = addr;
10637 else if ((!TARGET_64BIT
10638 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10639 && !TARGET_PECOFF
10640 && gotoff_operand (addr, Pmode))
10642 /* This symbol may be referenced via a displacement
10643 from the PIC base address (@GOTOFF). */
10644 if (GET_CODE (addr) == CONST)
10645 addr = XEXP (addr, 0);
10647 if (GET_CODE (addr) == PLUS)
10649 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10650 UNSPEC_GOTOFF);
10651 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10653 else
10654 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10656 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10658 if (TARGET_64BIT)
10659 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10661 if (reg != 0)
10663 gcc_assert (REG_P (reg));
10664 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10665 new_rtx, reg, 1, OPTAB_DIRECT);
10667 else
10668 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10670 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10671 /* We can't use @GOTOFF for text labels
10672 on VxWorks, see gotoff_operand. */
10673 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10675 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10676 if (tmp)
10677 return tmp;
10679 /* For x64 PE-COFF there is no GOT table,
10680 so we use address directly. */
10681 if (TARGET_64BIT && TARGET_PECOFF)
10683 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10684 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10686 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10688 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
10689 UNSPEC_GOTPCREL);
10690 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10691 new_rtx = gen_const_mem (Pmode, new_rtx);
10692 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10694 else
10696 /* This symbol must be referenced via a load
10697 from the Global Offset Table (@GOT). */
10698 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10699 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10700 if (TARGET_64BIT)
10701 new_rtx = force_reg (Pmode, new_rtx);
10702 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10703 new_rtx = gen_const_mem (Pmode, new_rtx);
10704 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10707 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10709 else
10711 if (CONST_INT_P (addr)
10712 && !x86_64_immediate_operand (addr, VOIDmode))
10713 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
10714 else if (GET_CODE (addr) == CONST)
10716 addr = XEXP (addr, 0);
10718 /* We must match stuff we generate before. Assume the only
10719 unspecs that can get here are ours. Not that we could do
10720 anything with them anyway.... */
10721 if (GET_CODE (addr) == UNSPEC
10722 || (GET_CODE (addr) == PLUS
10723 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10724 return orig;
10725 gcc_assert (GET_CODE (addr) == PLUS);
10728 if (GET_CODE (addr) == PLUS)
10730 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10732 /* Check first to see if this is a constant
10733 offset from a @GOTOFF symbol reference. */
10734 if (!TARGET_PECOFF
10735 && gotoff_operand (op0, Pmode)
10736 && CONST_INT_P (op1))
10738 if (!TARGET_64BIT)
10740 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10741 UNSPEC_GOTOFF);
10742 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10743 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10745 if (reg != 0)
10747 gcc_assert (REG_P (reg));
10748 new_rtx = expand_simple_binop (Pmode, PLUS,
10749 pic_offset_table_rtx,
10750 new_rtx, reg, 1,
10751 OPTAB_DIRECT);
10753 else
10754 new_rtx
10755 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10757 else
10759 if (INTVAL (op1) < -16*1024*1024
10760 || INTVAL (op1) >= 16*1024*1024)
10762 if (!x86_64_immediate_operand (op1, Pmode))
10763 op1 = force_reg (Pmode, op1);
10765 new_rtx
10766 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10770 else
10772 rtx base = legitimize_pic_address (op0, reg);
10773 machine_mode mode = GET_MODE (base);
10774 new_rtx
10775 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
10777 if (CONST_INT_P (new_rtx))
10779 if (INTVAL (new_rtx) < -16*1024*1024
10780 || INTVAL (new_rtx) >= 16*1024*1024)
10782 if (!x86_64_immediate_operand (new_rtx, mode))
10783 new_rtx = force_reg (mode, new_rtx);
10785 new_rtx
10786 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
10788 else
10789 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
10791 else
10793 /* For %rip addressing, we have to use
10794 just disp32, not base nor index. */
10795 if (TARGET_64BIT
10796 && (GET_CODE (base) == SYMBOL_REF
10797 || GET_CODE (base) == LABEL_REF))
10798 base = force_reg (mode, base);
10799 if (GET_CODE (new_rtx) == PLUS
10800 && CONSTANT_P (XEXP (new_rtx, 1)))
10802 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
10803 new_rtx = XEXP (new_rtx, 1);
10805 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
10810 return new_rtx;
10813 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10815 static rtx
10816 get_thread_pointer (machine_mode tp_mode, bool to_reg)
10818 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10820 if (GET_MODE (tp) != tp_mode)
10822 gcc_assert (GET_MODE (tp) == SImode);
10823 gcc_assert (tp_mode == DImode);
10825 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
10828 if (to_reg)
10829 tp = copy_to_mode_reg (tp_mode, tp);
10831 return tp;
10834 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10836 static GTY(()) rtx ix86_tls_symbol;
10838 static rtx
10839 ix86_tls_get_addr (void)
10841 if (!ix86_tls_symbol)
10843 const char *sym
10844 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
10845 ? "___tls_get_addr" : "__tls_get_addr");
10847 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
10850 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
10852 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
10853 UNSPEC_PLTOFF);
10854 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10855 gen_rtx_CONST (Pmode, unspec));
10858 return ix86_tls_symbol;
10861 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
10863 static GTY(()) rtx ix86_tls_module_base_symbol;
10866 ix86_tls_module_base (void)
10868 if (!ix86_tls_module_base_symbol)
10870 ix86_tls_module_base_symbol
10871 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
10873 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
10874 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
10877 return ix86_tls_module_base_symbol;
10880 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10881 false if we expect this to be used for a memory address and true if
10882 we expect to load the address into a register. */
10885 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
10887 rtx dest, base, off;
10888 rtx pic = NULL_RTX, tp = NULL_RTX;
10889 machine_mode tp_mode = Pmode;
10890 int type;
10892 /* Fall back to global dynamic model if tool chain cannot support local
10893 dynamic. */
10894 if (TARGET_SUN_TLS && !TARGET_64BIT
10895 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
10896 && model == TLS_MODEL_LOCAL_DYNAMIC)
10897 model = TLS_MODEL_GLOBAL_DYNAMIC;
10899 switch (model)
10901 case TLS_MODEL_GLOBAL_DYNAMIC:
10902 if (!TARGET_64BIT)
10904 if (flag_pic && !TARGET_PECOFF)
10905 pic = pic_offset_table_rtx;
10906 else
10908 pic = gen_reg_rtx (Pmode);
10909 emit_insn (gen_set_got (pic));
10913 if (TARGET_GNU2_TLS)
10915 dest = gen_reg_rtx (ptr_mode);
10916 if (TARGET_64BIT)
10917 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
10918 else
10919 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
10921 tp = get_thread_pointer (ptr_mode, true);
10922 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
10923 if (GET_MODE (dest) != Pmode)
10924 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
10925 dest = force_reg (Pmode, dest);
10927 if (GET_MODE (x) != Pmode)
10928 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10930 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
10932 else
10934 rtx caddr = ix86_tls_get_addr ();
10936 dest = gen_reg_rtx (Pmode);
10937 if (TARGET_64BIT)
10939 rtx rax = gen_rtx_REG (Pmode, AX_REG);
10940 rtx_insn *insns;
10942 start_sequence ();
10943 emit_call_insn
10944 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
10945 insns = get_insns ();
10946 end_sequence ();
10948 if (GET_MODE (x) != Pmode)
10949 x = gen_rtx_ZERO_EXTEND (Pmode, x);
10951 RTL_CONST_CALL_P (insns) = 1;
10952 emit_libcall_block (insns, dest, rax, x);
10954 else
10955 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
10957 break;
10959 case TLS_MODEL_LOCAL_DYNAMIC:
10960 if (!TARGET_64BIT)
10962 if (flag_pic)
10963 pic = pic_offset_table_rtx;
10964 else
10966 pic = gen_reg_rtx (Pmode);
10967 emit_insn (gen_set_got (pic));
10971 if (TARGET_GNU2_TLS)
10973 rtx tmp = ix86_tls_module_base ();
10975 base = gen_reg_rtx (ptr_mode);
10976 if (TARGET_64BIT)
10977 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
10978 else
10979 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
10981 tp = get_thread_pointer (ptr_mode, true);
10982 if (GET_MODE (base) != Pmode)
10983 base = gen_rtx_ZERO_EXTEND (Pmode, base);
10984 base = force_reg (Pmode, base);
10986 else
10988 rtx caddr = ix86_tls_get_addr ();
10990 base = gen_reg_rtx (Pmode);
10991 if (TARGET_64BIT)
10993 rtx rax = gen_rtx_REG (Pmode, AX_REG);
10994 rtx_insn *insns;
10995 rtx eqv;
10997 start_sequence ();
10998 emit_call_insn
10999 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11000 insns = get_insns ();
11001 end_sequence ();
11003 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11004 share the LD_BASE result with other LD model accesses. */
11005 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11006 UNSPEC_TLS_LD_BASE);
11008 RTL_CONST_CALL_P (insns) = 1;
11009 emit_libcall_block (insns, base, rax, eqv);
11011 else
11012 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11015 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11016 off = gen_rtx_CONST (Pmode, off);
11018 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11020 if (TARGET_GNU2_TLS)
11022 if (GET_MODE (tp) != Pmode)
11024 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11025 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11026 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11028 else
11029 dest = gen_rtx_PLUS (Pmode, tp, dest);
11030 dest = force_reg (Pmode, dest);
11032 if (GET_MODE (x) != Pmode)
11033 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11035 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11037 break;
11039 case TLS_MODEL_INITIAL_EXEC:
11040 if (TARGET_64BIT)
11042 if (TARGET_SUN_TLS && !TARGET_X32)
11044 /* The Sun linker took the AMD64 TLS spec literally
11045 and can only handle %rax as destination of the
11046 initial executable code sequence. */
11048 dest = gen_reg_rtx (DImode);
11049 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11050 return dest;
11053 /* Generate DImode references to avoid %fs:(%reg32)
11054 problems and linker IE->LE relaxation bug. */
11055 tp_mode = DImode;
11056 pic = NULL;
11057 type = UNSPEC_GOTNTPOFF;
11059 else if (flag_pic)
11061 pic = pic_offset_table_rtx;
11062 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11064 else if (!TARGET_ANY_GNU_TLS)
11066 pic = gen_reg_rtx (Pmode);
11067 emit_insn (gen_set_got (pic));
11068 type = UNSPEC_GOTTPOFF;
11070 else
11072 pic = NULL;
11073 type = UNSPEC_INDNTPOFF;
11076 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11077 off = gen_rtx_CONST (tp_mode, off);
11078 if (pic)
11079 off = gen_rtx_PLUS (tp_mode, pic, off);
11080 off = gen_const_mem (tp_mode, off);
11081 set_mem_alias_set (off, ix86_GOT_alias_set ());
11083 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11085 base = get_thread_pointer (tp_mode,
11086 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11087 off = force_reg (tp_mode, off);
11088 dest = gen_rtx_PLUS (tp_mode, base, off);
11089 if (tp_mode != Pmode)
11090 dest = convert_to_mode (Pmode, dest, 1);
11092 else
11094 base = get_thread_pointer (Pmode, true);
11095 dest = gen_reg_rtx (Pmode);
11096 emit_insn (gen_sub3_insn (dest, base, off));
11098 break;
11100 case TLS_MODEL_LOCAL_EXEC:
11101 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11102 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11103 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11104 off = gen_rtx_CONST (Pmode, off);
11106 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11108 base = get_thread_pointer (Pmode,
11109 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11110 return gen_rtx_PLUS (Pmode, base, off);
11112 else
11114 base = get_thread_pointer (Pmode, true);
11115 dest = gen_reg_rtx (Pmode);
11116 emit_insn (gen_sub3_insn (dest, base, off));
11118 break;
11120 default:
11121 gcc_unreachable ();
11124 return dest;
11127 /* Return true if OP refers to a TLS address. */
11128 bool
11129 ix86_tls_address_pattern_p (rtx op)
11131 subrtx_var_iterator::array_type array;
11132 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11134 rtx op = *iter;
11135 if (MEM_P (op))
11137 rtx *x = &XEXP (op, 0);
11138 while (GET_CODE (*x) == PLUS)
11140 int i;
11141 for (i = 0; i < 2; i++)
11143 rtx u = XEXP (*x, i);
11144 if (GET_CODE (u) == ZERO_EXTEND)
11145 u = XEXP (u, 0);
11146 if (GET_CODE (u) == UNSPEC
11147 && XINT (u, 1) == UNSPEC_TP)
11148 return true;
11150 x = &XEXP (*x, 0);
11153 iter.skip_subrtxes ();
11157 return false;
11160 /* Rewrite *LOC so that it refers to a default TLS address space. */
11161 void
11162 ix86_rewrite_tls_address_1 (rtx *loc)
11164 subrtx_ptr_iterator::array_type array;
11165 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11167 rtx *loc = *iter;
11168 if (MEM_P (*loc))
11170 rtx addr = XEXP (*loc, 0);
11171 rtx *x = &addr;
11172 while (GET_CODE (*x) == PLUS)
11174 int i;
11175 for (i = 0; i < 2; i++)
11177 rtx u = XEXP (*x, i);
11178 if (GET_CODE (u) == ZERO_EXTEND)
11179 u = XEXP (u, 0);
11180 if (GET_CODE (u) == UNSPEC
11181 && XINT (u, 1) == UNSPEC_TP)
11183 addr_space_t as = DEFAULT_TLS_SEG_REG;
11185 *x = XEXP (*x, 1 - i);
11187 *loc = replace_equiv_address_nv (*loc, addr, true);
11188 set_mem_addr_space (*loc, as);
11189 return;
11192 x = &XEXP (*x, 0);
11195 iter.skip_subrtxes ();
11200 /* Rewrite instruction pattern involvning TLS address
11201 so that it refers to a default TLS address space. */
11203 ix86_rewrite_tls_address (rtx pattern)
11205 pattern = copy_insn (pattern);
11206 ix86_rewrite_tls_address_1 (&pattern);
11207 return pattern;
11210 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11211 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11212 unique refptr-DECL symbol corresponding to symbol DECL. */
11214 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11216 static inline hashval_t hash (tree_map *m) { return m->hash; }
11217 static inline bool
11218 equal (tree_map *a, tree_map *b)
11220 return a->base.from == b->base.from;
11223 static int
11224 keep_cache_entry (tree_map *&m)
11226 return ggc_marked_p (m->base.from);
11230 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11232 static tree
11233 get_dllimport_decl (tree decl, bool beimport)
11235 struct tree_map *h, in;
11236 const char *name;
11237 const char *prefix;
11238 size_t namelen, prefixlen;
11239 char *imp_name;
11240 tree to;
11241 rtx rtl;
11243 if (!dllimport_map)
11244 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11246 in.hash = htab_hash_pointer (decl);
11247 in.base.from = decl;
11248 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11249 h = *loc;
11250 if (h)
11251 return h->to;
11253 *loc = h = ggc_alloc<tree_map> ();
11254 h->hash = in.hash;
11255 h->base.from = decl;
11256 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11257 VAR_DECL, NULL, ptr_type_node);
11258 DECL_ARTIFICIAL (to) = 1;
11259 DECL_IGNORED_P (to) = 1;
11260 DECL_EXTERNAL (to) = 1;
11261 TREE_READONLY (to) = 1;
11263 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11264 name = targetm.strip_name_encoding (name);
11265 if (beimport)
11266 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11267 ? "*__imp_" : "*__imp__";
11268 else
11269 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11270 namelen = strlen (name);
11271 prefixlen = strlen (prefix);
11272 imp_name = (char *) alloca (namelen + prefixlen + 1);
11273 memcpy (imp_name, prefix, prefixlen);
11274 memcpy (imp_name + prefixlen, name, namelen + 1);
11276 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11277 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11278 SET_SYMBOL_REF_DECL (rtl, to);
11279 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11280 if (!beimport)
11282 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11283 #ifdef SUB_TARGET_RECORD_STUB
11284 SUB_TARGET_RECORD_STUB (name);
11285 #endif
11288 rtl = gen_const_mem (Pmode, rtl);
11289 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11291 SET_DECL_RTL (to, rtl);
11292 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11294 return to;
11297 /* Expand SYMBOL into its corresponding far-address symbol.
11298 WANT_REG is true if we require the result be a register. */
11300 static rtx
11301 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11303 tree imp_decl;
11304 rtx x;
11306 gcc_assert (SYMBOL_REF_DECL (symbol));
11307 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11309 x = DECL_RTL (imp_decl);
11310 if (want_reg)
11311 x = force_reg (Pmode, x);
11312 return x;
11315 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11316 true if we require the result be a register. */
11318 static rtx
11319 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11321 tree imp_decl;
11322 rtx x;
11324 gcc_assert (SYMBOL_REF_DECL (symbol));
11325 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11327 x = DECL_RTL (imp_decl);
11328 if (want_reg)
11329 x = force_reg (Pmode, x);
11330 return x;
11333 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11334 is true if we require the result be a register. */
11337 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11339 if (!TARGET_PECOFF)
11340 return NULL_RTX;
11342 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11344 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11345 return legitimize_dllimport_symbol (addr, inreg);
11346 if (GET_CODE (addr) == CONST
11347 && GET_CODE (XEXP (addr, 0)) == PLUS
11348 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11349 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11351 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11352 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11356 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11357 return NULL_RTX;
11358 if (GET_CODE (addr) == SYMBOL_REF
11359 && !is_imported_p (addr)
11360 && SYMBOL_REF_EXTERNAL_P (addr)
11361 && SYMBOL_REF_DECL (addr))
11362 return legitimize_pe_coff_extern_decl (addr, inreg);
11364 if (GET_CODE (addr) == CONST
11365 && GET_CODE (XEXP (addr, 0)) == PLUS
11366 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11367 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11368 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11369 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11371 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11372 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11374 return NULL_RTX;
11377 /* Try machine-dependent ways of modifying an illegitimate address
11378 to be legitimate. If we find one, return the new, valid address.
11379 This macro is used in only one place: `memory_address' in explow.c.
11381 OLDX is the address as it was before break_out_memory_refs was called.
11382 In some cases it is useful to look at this to decide what needs to be done.
11384 It is always safe for this macro to do nothing. It exists to recognize
11385 opportunities to optimize the output.
11387 For the 80386, we handle X+REG by loading X into a register R and
11388 using R+REG. R will go in a general reg and indexing will be used.
11389 However, if REG is a broken-out memory address or multiplication,
11390 nothing needs to be done because REG can certainly go in a general reg.
11392 When -fpic is used, special handling is needed for symbolic references.
11393 See comments by legitimize_pic_address in i386.c for details. */
11395 static rtx
11396 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11398 bool changed = false;
11399 unsigned log;
11401 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11402 if (log)
11403 return legitimize_tls_address (x, (enum tls_model) log, false);
11404 if (GET_CODE (x) == CONST
11405 && GET_CODE (XEXP (x, 0)) == PLUS
11406 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11407 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11409 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11410 (enum tls_model) log, false);
11411 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11414 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11416 rtx tmp = legitimize_pe_coff_symbol (x, true);
11417 if (tmp)
11418 return tmp;
11421 if (flag_pic && SYMBOLIC_CONST (x))
11422 return legitimize_pic_address (x, 0);
11424 #if TARGET_MACHO
11425 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11426 return machopic_indirect_data_reference (x, 0);
11427 #endif
11429 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11430 if (GET_CODE (x) == ASHIFT
11431 && CONST_INT_P (XEXP (x, 1))
11432 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11434 changed = true;
11435 log = INTVAL (XEXP (x, 1));
11436 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11437 GEN_INT (1 << log));
11440 if (GET_CODE (x) == PLUS)
11442 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11444 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11445 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11446 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11448 changed = true;
11449 log = INTVAL (XEXP (XEXP (x, 0), 1));
11450 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11451 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11452 GEN_INT (1 << log));
11455 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11456 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11457 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11459 changed = true;
11460 log = INTVAL (XEXP (XEXP (x, 1), 1));
11461 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11462 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11463 GEN_INT (1 << log));
11466 /* Put multiply first if it isn't already. */
11467 if (GET_CODE (XEXP (x, 1)) == MULT)
11469 std::swap (XEXP (x, 0), XEXP (x, 1));
11470 changed = true;
11473 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11474 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11475 created by virtual register instantiation, register elimination, and
11476 similar optimizations. */
11477 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11479 changed = true;
11480 x = gen_rtx_PLUS (Pmode,
11481 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11482 XEXP (XEXP (x, 1), 0)),
11483 XEXP (XEXP (x, 1), 1));
11486 /* Canonicalize
11487 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11488 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11489 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11490 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11491 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11492 && CONSTANT_P (XEXP (x, 1)))
11494 rtx constant;
11495 rtx other = NULL_RTX;
11497 if (CONST_INT_P (XEXP (x, 1)))
11499 constant = XEXP (x, 1);
11500 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11502 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11504 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11505 other = XEXP (x, 1);
11507 else
11508 constant = 0;
11510 if (constant)
11512 changed = true;
11513 x = gen_rtx_PLUS (Pmode,
11514 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11515 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11516 plus_constant (Pmode, other,
11517 INTVAL (constant)));
11521 if (changed && ix86_legitimate_address_p (mode, x, false))
11522 return x;
11524 if (GET_CODE (XEXP (x, 0)) == MULT)
11526 changed = true;
11527 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11530 if (GET_CODE (XEXP (x, 1)) == MULT)
11532 changed = true;
11533 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11536 if (changed
11537 && REG_P (XEXP (x, 1))
11538 && REG_P (XEXP (x, 0)))
11539 return x;
11541 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11543 changed = true;
11544 x = legitimize_pic_address (x, 0);
11547 if (changed && ix86_legitimate_address_p (mode, x, false))
11548 return x;
11550 if (REG_P (XEXP (x, 0)))
11552 rtx temp = gen_reg_rtx (Pmode);
11553 rtx val = force_operand (XEXP (x, 1), temp);
11554 if (val != temp)
11556 val = convert_to_mode (Pmode, val, 1);
11557 emit_move_insn (temp, val);
11560 XEXP (x, 1) = temp;
11561 return x;
11564 else if (REG_P (XEXP (x, 1)))
11566 rtx temp = gen_reg_rtx (Pmode);
11567 rtx val = force_operand (XEXP (x, 0), temp);
11568 if (val != temp)
11570 val = convert_to_mode (Pmode, val, 1);
11571 emit_move_insn (temp, val);
11574 XEXP (x, 0) = temp;
11575 return x;
11579 return x;
11582 /* Print an integer constant expression in assembler syntax. Addition
11583 and subtraction are the only arithmetic that may appear in these
11584 expressions. FILE is the stdio stream to write to, X is the rtx, and
11585 CODE is the operand print code from the output string. */
11587 static void
11588 output_pic_addr_const (FILE *file, rtx x, int code)
11590 char buf[256];
11592 switch (GET_CODE (x))
11594 case PC:
11595 gcc_assert (flag_pic);
11596 putc ('.', file);
11597 break;
11599 case SYMBOL_REF:
11600 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11601 output_addr_const (file, x);
11602 else
11604 const char *name = XSTR (x, 0);
11606 /* Mark the decl as referenced so that cgraph will
11607 output the function. */
11608 if (SYMBOL_REF_DECL (x))
11609 mark_decl_referenced (SYMBOL_REF_DECL (x));
11611 #if TARGET_MACHO
11612 if (MACHOPIC_INDIRECT
11613 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11614 name = machopic_indirection_name (x, /*stub_p=*/true);
11615 #endif
11616 assemble_name (file, name);
11618 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11619 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11620 fputs ("@PLT", file);
11621 break;
11623 case LABEL_REF:
11624 x = XEXP (x, 0);
11625 /* FALLTHRU */
11626 case CODE_LABEL:
11627 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11628 assemble_name (asm_out_file, buf);
11629 break;
11631 case CONST_INT:
11632 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11633 break;
11635 case CONST:
11636 /* This used to output parentheses around the expression,
11637 but that does not work on the 386 (either ATT or BSD assembler). */
11638 output_pic_addr_const (file, XEXP (x, 0), code);
11639 break;
11641 case CONST_DOUBLE:
11642 /* We can't handle floating point constants;
11643 TARGET_PRINT_OPERAND must handle them. */
11644 output_operand_lossage ("floating constant misused");
11645 break;
11647 case PLUS:
11648 /* Some assemblers need integer constants to appear first. */
11649 if (CONST_INT_P (XEXP (x, 0)))
11651 output_pic_addr_const (file, XEXP (x, 0), code);
11652 putc ('+', file);
11653 output_pic_addr_const (file, XEXP (x, 1), code);
11655 else
11657 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11658 output_pic_addr_const (file, XEXP (x, 1), code);
11659 putc ('+', file);
11660 output_pic_addr_const (file, XEXP (x, 0), code);
11662 break;
11664 case MINUS:
11665 if (!TARGET_MACHO)
11666 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11667 output_pic_addr_const (file, XEXP (x, 0), code);
11668 putc ('-', file);
11669 output_pic_addr_const (file, XEXP (x, 1), code);
11670 if (!TARGET_MACHO)
11671 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11672 break;
11674 case UNSPEC:
11675 gcc_assert (XVECLEN (x, 0) == 1);
11676 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11677 switch (XINT (x, 1))
11679 case UNSPEC_GOT:
11680 fputs ("@GOT", file);
11681 break;
11682 case UNSPEC_GOTOFF:
11683 fputs ("@GOTOFF", file);
11684 break;
11685 case UNSPEC_PLTOFF:
11686 fputs ("@PLTOFF", file);
11687 break;
11688 case UNSPEC_PCREL:
11689 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11690 "(%rip)" : "[rip]", file);
11691 break;
11692 case UNSPEC_GOTPCREL:
11693 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11694 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11695 break;
11696 case UNSPEC_GOTTPOFF:
11697 /* FIXME: This might be @TPOFF in Sun ld too. */
11698 fputs ("@gottpoff", file);
11699 break;
11700 case UNSPEC_TPOFF:
11701 fputs ("@tpoff", file);
11702 break;
11703 case UNSPEC_NTPOFF:
11704 if (TARGET_64BIT)
11705 fputs ("@tpoff", file);
11706 else
11707 fputs ("@ntpoff", file);
11708 break;
11709 case UNSPEC_DTPOFF:
11710 fputs ("@dtpoff", file);
11711 break;
11712 case UNSPEC_GOTNTPOFF:
11713 if (TARGET_64BIT)
11714 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11715 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11716 else
11717 fputs ("@gotntpoff", file);
11718 break;
11719 case UNSPEC_INDNTPOFF:
11720 fputs ("@indntpoff", file);
11721 break;
11722 #if TARGET_MACHO
11723 case UNSPEC_MACHOPIC_OFFSET:
11724 putc ('-', file);
11725 machopic_output_function_base_name (file);
11726 break;
11727 #endif
11728 default:
11729 output_operand_lossage ("invalid UNSPEC as operand");
11730 break;
11732 break;
11734 default:
11735 output_operand_lossage ("invalid expression as operand");
11739 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11740 We need to emit DTP-relative relocations. */
11742 static void ATTRIBUTE_UNUSED
11743 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11745 fputs (ASM_LONG, file);
11746 output_addr_const (file, x);
11747 fputs ("@dtpoff", file);
11748 switch (size)
11750 case 4:
11751 break;
11752 case 8:
11753 fputs (", 0", file);
11754 break;
11755 default:
11756 gcc_unreachable ();
11760 /* Return true if X is a representation of the PIC register. This copes
11761 with calls from ix86_find_base_term, where the register might have
11762 been replaced by a cselib value. */
11764 static bool
11765 ix86_pic_register_p (rtx x)
11767 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11768 return (pic_offset_table_rtx
11769 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11770 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
11771 return true;
11772 else if (!REG_P (x))
11773 return false;
11774 else if (pic_offset_table_rtx)
11776 if (REGNO (x) == REGNO (pic_offset_table_rtx))
11777 return true;
11778 if (HARD_REGISTER_P (x)
11779 && !HARD_REGISTER_P (pic_offset_table_rtx)
11780 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
11781 return true;
11782 return false;
11784 else
11785 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11788 /* Helper function for ix86_delegitimize_address.
11789 Attempt to delegitimize TLS local-exec accesses. */
11791 static rtx
11792 ix86_delegitimize_tls_address (rtx orig_x)
11794 rtx x = orig_x, unspec;
11795 struct ix86_address addr;
11797 if (!TARGET_TLS_DIRECT_SEG_REFS)
11798 return orig_x;
11799 if (MEM_P (x))
11800 x = XEXP (x, 0);
11801 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
11802 return orig_x;
11803 if (ix86_decompose_address (x, &addr) == 0
11804 || addr.seg != DEFAULT_TLS_SEG_REG
11805 || addr.disp == NULL_RTX
11806 || GET_CODE (addr.disp) != CONST)
11807 return orig_x;
11808 unspec = XEXP (addr.disp, 0);
11809 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
11810 unspec = XEXP (unspec, 0);
11811 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
11812 return orig_x;
11813 x = XVECEXP (unspec, 0, 0);
11814 gcc_assert (GET_CODE (x) == SYMBOL_REF);
11815 if (unspec != XEXP (addr.disp, 0))
11816 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
11817 if (addr.index)
11819 rtx idx = addr.index;
11820 if (addr.scale != 1)
11821 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
11822 x = gen_rtx_PLUS (Pmode, idx, x);
11824 if (addr.base)
11825 x = gen_rtx_PLUS (Pmode, addr.base, x);
11826 if (MEM_P (orig_x))
11827 x = replace_equiv_address_nv (orig_x, x);
11828 return x;
11831 /* In the name of slightly smaller debug output, and to cater to
11832 general assembler lossage, recognize PIC+GOTOFF and turn it back
11833 into a direct symbol reference.
11835 On Darwin, this is necessary to avoid a crash, because Darwin
11836 has a different PIC label for each routine but the DWARF debugging
11837 information is not associated with any particular routine, so it's
11838 necessary to remove references to the PIC label from RTL stored by
11839 the DWARF output code.
11841 This helper is used in the normal ix86_delegitimize_address
11842 entrypoint (e.g. used in the target delegitimization hook) and
11843 in ix86_find_base_term. As compile time memory optimization, we
11844 avoid allocating rtxes that will not change anything on the outcome
11845 of the callers (find_base_value and find_base_term). */
11847 static inline rtx
11848 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
11850 rtx orig_x = delegitimize_mem_from_attrs (x);
11851 /* addend is NULL or some rtx if x is something+GOTOFF where
11852 something doesn't include the PIC register. */
11853 rtx addend = NULL_RTX;
11854 /* reg_addend is NULL or a multiple of some register. */
11855 rtx reg_addend = NULL_RTX;
11856 /* const_addend is NULL or a const_int. */
11857 rtx const_addend = NULL_RTX;
11858 /* This is the result, or NULL. */
11859 rtx result = NULL_RTX;
11861 x = orig_x;
11863 if (MEM_P (x))
11864 x = XEXP (x, 0);
11866 if (TARGET_64BIT)
11868 if (GET_CODE (x) == CONST
11869 && GET_CODE (XEXP (x, 0)) == PLUS
11870 && GET_MODE (XEXP (x, 0)) == Pmode
11871 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11872 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
11873 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
11875 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11876 base. A CONST can't be arg_pointer_rtx based. */
11877 if (base_term_p && MEM_P (orig_x))
11878 return orig_x;
11879 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
11880 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
11881 if (MEM_P (orig_x))
11882 x = replace_equiv_address_nv (orig_x, x);
11883 return x;
11886 if (GET_CODE (x) == CONST
11887 && GET_CODE (XEXP (x, 0)) == UNSPEC
11888 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
11889 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
11890 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
11892 x = XVECEXP (XEXP (x, 0), 0, 0);
11893 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
11895 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
11896 if (x == NULL_RTX)
11897 return orig_x;
11899 return x;
11902 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
11903 return ix86_delegitimize_tls_address (orig_x);
11905 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
11906 and -mcmodel=medium -fpic. */
11909 if (GET_CODE (x) != PLUS
11910 || GET_CODE (XEXP (x, 1)) != CONST)
11911 return ix86_delegitimize_tls_address (orig_x);
11913 if (ix86_pic_register_p (XEXP (x, 0)))
11914 /* %ebx + GOT/GOTOFF */
11916 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11918 /* %ebx + %reg * scale + GOT/GOTOFF */
11919 reg_addend = XEXP (x, 0);
11920 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11921 reg_addend = XEXP (reg_addend, 1);
11922 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11923 reg_addend = XEXP (reg_addend, 0);
11924 else
11926 reg_addend = NULL_RTX;
11927 addend = XEXP (x, 0);
11930 else
11931 addend = XEXP (x, 0);
11933 x = XEXP (XEXP (x, 1), 0);
11934 if (GET_CODE (x) == PLUS
11935 && CONST_INT_P (XEXP (x, 1)))
11937 const_addend = XEXP (x, 1);
11938 x = XEXP (x, 0);
11941 if (GET_CODE (x) == UNSPEC
11942 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11943 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
11944 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
11945 && !MEM_P (orig_x) && !addend)))
11946 result = XVECEXP (x, 0, 0);
11948 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
11949 && !MEM_P (orig_x))
11950 result = XVECEXP (x, 0, 0);
11952 if (! result)
11953 return ix86_delegitimize_tls_address (orig_x);
11955 /* For (PLUS something CONST_INT) both find_base_{value,term} just
11956 recurse on the first operand. */
11957 if (const_addend && !base_term_p)
11958 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11959 if (reg_addend)
11960 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11961 if (addend)
11963 /* If the rest of original X doesn't involve the PIC register, add
11964 addend and subtract pic_offset_table_rtx. This can happen e.g.
11965 for code like:
11966 leal (%ebx, %ecx, 4), %ecx
11968 movl foo@GOTOFF(%ecx), %edx
11969 in which case we return (%ecx - %ebx) + foo
11970 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
11971 and reload has completed. Don't do the latter for debug,
11972 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
11973 if (pic_offset_table_rtx
11974 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
11975 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11976 pic_offset_table_rtx),
11977 result);
11978 else if (base_term_p
11979 && pic_offset_table_rtx
11980 && !TARGET_MACHO
11981 && !TARGET_VXWORKS_RTP)
11983 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11984 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
11985 result = gen_rtx_PLUS (Pmode, tmp, result);
11987 else
11988 return orig_x;
11990 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11992 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
11993 if (result == NULL_RTX)
11994 return orig_x;
11996 return result;
11999 /* The normal instantiation of the above template. */
12001 static rtx
12002 ix86_delegitimize_address (rtx x)
12004 return ix86_delegitimize_address_1 (x, false);
12007 /* If X is a machine specific address (i.e. a symbol or label being
12008 referenced as a displacement from the GOT implemented using an
12009 UNSPEC), then return the base term. Otherwise return X. */
12012 ix86_find_base_term (rtx x)
12014 rtx term;
12016 if (TARGET_64BIT)
12018 if (GET_CODE (x) != CONST)
12019 return x;
12020 term = XEXP (x, 0);
12021 if (GET_CODE (term) == PLUS
12022 && CONST_INT_P (XEXP (term, 1)))
12023 term = XEXP (term, 0);
12024 if (GET_CODE (term) != UNSPEC
12025 || (XINT (term, 1) != UNSPEC_GOTPCREL
12026 && XINT (term, 1) != UNSPEC_PCREL))
12027 return x;
12029 return XVECEXP (term, 0, 0);
12032 return ix86_delegitimize_address_1 (x, true);
12035 /* Return true if X shouldn't be emitted into the debug info.
12036 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12037 symbol easily into the .debug_info section, so we need not to
12038 delegitimize, but instead assemble as @gotoff.
12039 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12040 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12042 static bool
12043 ix86_const_not_ok_for_debug_p (rtx x)
12045 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12046 return true;
12048 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12049 return true;
12051 return false;
12054 static void
12055 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12056 bool fp, FILE *file)
12058 const char *suffix;
12060 if (mode == CCFPmode)
12062 code = ix86_fp_compare_code_to_integer (code);
12063 mode = CCmode;
12065 if (reverse)
12066 code = reverse_condition (code);
12068 switch (code)
12070 case EQ:
12071 gcc_assert (mode != CCGZmode);
12072 switch (mode)
12074 case E_CCAmode:
12075 suffix = "a";
12076 break;
12077 case E_CCCmode:
12078 suffix = "c";
12079 break;
12080 case E_CCOmode:
12081 suffix = "o";
12082 break;
12083 case E_CCPmode:
12084 suffix = "p";
12085 break;
12086 case E_CCSmode:
12087 suffix = "s";
12088 break;
12089 default:
12090 suffix = "e";
12091 break;
12093 break;
12094 case NE:
12095 gcc_assert (mode != CCGZmode);
12096 switch (mode)
12098 case E_CCAmode:
12099 suffix = "na";
12100 break;
12101 case E_CCCmode:
12102 suffix = "nc";
12103 break;
12104 case E_CCOmode:
12105 suffix = "no";
12106 break;
12107 case E_CCPmode:
12108 suffix = "np";
12109 break;
12110 case E_CCSmode:
12111 suffix = "ns";
12112 break;
12113 default:
12114 suffix = "ne";
12115 break;
12117 break;
12118 case GT:
12119 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12120 suffix = "g";
12121 break;
12122 case GTU:
12123 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12124 Those same assemblers have the same but opposite lossage on cmov. */
12125 if (mode == CCmode)
12126 suffix = fp ? "nbe" : "a";
12127 else
12128 gcc_unreachable ();
12129 break;
12130 case LT:
12131 switch (mode)
12133 case E_CCNOmode:
12134 case E_CCGOCmode:
12135 suffix = "s";
12136 break;
12138 case E_CCmode:
12139 case E_CCGCmode:
12140 case E_CCGZmode:
12141 suffix = "l";
12142 break;
12144 default:
12145 gcc_unreachable ();
12147 break;
12148 case LTU:
12149 if (mode == CCmode || mode == CCGZmode)
12150 suffix = "b";
12151 else if (mode == CCCmode)
12152 suffix = fp ? "b" : "c";
12153 else
12154 gcc_unreachable ();
12155 break;
12156 case GE:
12157 switch (mode)
12159 case E_CCNOmode:
12160 case E_CCGOCmode:
12161 suffix = "ns";
12162 break;
12164 case E_CCmode:
12165 case E_CCGCmode:
12166 case E_CCGZmode:
12167 suffix = "ge";
12168 break;
12170 default:
12171 gcc_unreachable ();
12173 break;
12174 case GEU:
12175 if (mode == CCmode || mode == CCGZmode)
12176 suffix = "nb";
12177 else if (mode == CCCmode)
12178 suffix = fp ? "nb" : "nc";
12179 else
12180 gcc_unreachable ();
12181 break;
12182 case LE:
12183 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12184 suffix = "le";
12185 break;
12186 case LEU:
12187 if (mode == CCmode)
12188 suffix = "be";
12189 else
12190 gcc_unreachable ();
12191 break;
12192 case UNORDERED:
12193 suffix = fp ? "u" : "p";
12194 break;
12195 case ORDERED:
12196 suffix = fp ? "nu" : "np";
12197 break;
12198 default:
12199 gcc_unreachable ();
12201 fputs (suffix, file);
12204 /* Print the name of register X to FILE based on its machine mode and number.
12205 If CODE is 'w', pretend the mode is HImode.
12206 If CODE is 'b', pretend the mode is QImode.
12207 If CODE is 'k', pretend the mode is SImode.
12208 If CODE is 'q', pretend the mode is DImode.
12209 If CODE is 'x', pretend the mode is V4SFmode.
12210 If CODE is 't', pretend the mode is V8SFmode.
12211 If CODE is 'g', pretend the mode is V16SFmode.
12212 If CODE is 'h', pretend the reg is the 'high' byte register.
12213 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12214 If CODE is 'd', duplicate the operand for AVX instruction.
12215 If CODE is 'V', print naked full integer register name without %.
12218 void
12219 print_reg (rtx x, int code, FILE *file)
12221 const char *reg;
12222 int msize;
12223 unsigned int regno;
12224 bool duplicated;
12226 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12227 putc ('%', file);
12229 if (x == pc_rtx)
12231 gcc_assert (TARGET_64BIT);
12232 fputs ("rip", file);
12233 return;
12236 if (code == 'y' && STACK_TOP_P (x))
12238 fputs ("st(0)", file);
12239 return;
12242 if (code == 'w')
12243 msize = 2;
12244 else if (code == 'b')
12245 msize = 1;
12246 else if (code == 'k')
12247 msize = 4;
12248 else if (code == 'q')
12249 msize = 8;
12250 else if (code == 'h')
12251 msize = 0;
12252 else if (code == 'x')
12253 msize = 16;
12254 else if (code == 't')
12255 msize = 32;
12256 else if (code == 'g')
12257 msize = 64;
12258 else
12259 msize = GET_MODE_SIZE (GET_MODE (x));
12261 regno = REGNO (x);
12263 if (regno == ARG_POINTER_REGNUM
12264 || regno == FRAME_POINTER_REGNUM
12265 || regno == FPSR_REG)
12267 output_operand_lossage
12268 ("invalid use of register '%s'", reg_names[regno]);
12269 return;
12271 else if (regno == FLAGS_REG)
12273 output_operand_lossage ("invalid use of asm flag output");
12274 return;
12277 if (code == 'V')
12279 if (GENERAL_REGNO_P (regno))
12280 msize = GET_MODE_SIZE (word_mode);
12281 else
12282 error ("%<V%> modifier on non-integer register");
12285 duplicated = code == 'd' && TARGET_AVX;
12287 switch (msize)
12289 case 16:
12290 case 12:
12291 case 8:
12292 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12293 warning (0, "unsupported size for integer register");
12294 /* FALLTHRU */
12295 case 4:
12296 if (LEGACY_INT_REGNO_P (regno))
12297 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12298 /* FALLTHRU */
12299 case 2:
12300 normal:
12301 reg = hi_reg_name[regno];
12302 break;
12303 case 1:
12304 if (regno >= ARRAY_SIZE (qi_reg_name))
12305 goto normal;
12306 if (!ANY_QI_REGNO_P (regno))
12307 error ("unsupported size for integer register");
12308 reg = qi_reg_name[regno];
12309 break;
12310 case 0:
12311 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12312 goto normal;
12313 reg = qi_high_reg_name[regno];
12314 break;
12315 case 32:
12316 case 64:
12317 if (SSE_REGNO_P (regno))
12319 gcc_assert (!duplicated);
12320 putc (msize == 32 ? 'y' : 'z', file);
12321 reg = hi_reg_name[regno] + 1;
12322 break;
12324 goto normal;
12325 default:
12326 gcc_unreachable ();
12329 fputs (reg, file);
12331 /* Irritatingly, AMD extended registers use
12332 different naming convention: "r%d[bwd]" */
12333 if (REX_INT_REGNO_P (regno))
12335 gcc_assert (TARGET_64BIT);
12336 switch (msize)
12338 case 0:
12339 error ("extended registers have no high halves");
12340 break;
12341 case 1:
12342 putc ('b', file);
12343 break;
12344 case 2:
12345 putc ('w', file);
12346 break;
12347 case 4:
12348 putc ('d', file);
12349 break;
12350 case 8:
12351 /* no suffix */
12352 break;
12353 default:
12354 error ("unsupported operand size for extended register");
12355 break;
12357 return;
12360 if (duplicated)
12362 if (ASSEMBLER_DIALECT == ASM_ATT)
12363 fprintf (file, ", %%%s", reg);
12364 else
12365 fprintf (file, ", %s", reg);
12369 /* Meaning of CODE:
12370 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12371 C -- print opcode suffix for set/cmov insn.
12372 c -- like C, but print reversed condition
12373 F,f -- likewise, but for floating-point.
12374 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12375 otherwise nothing
12376 R -- print embedded rounding and sae.
12377 r -- print only sae.
12378 z -- print the opcode suffix for the size of the current operand.
12379 Z -- likewise, with special suffixes for x87 instructions.
12380 * -- print a star (in certain assembler syntax)
12381 A -- print an absolute memory reference.
12382 E -- print address with DImode register names if TARGET_64BIT.
12383 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12384 s -- print a shift double count, followed by the assemblers argument
12385 delimiter.
12386 b -- print the QImode name of the register for the indicated operand.
12387 %b0 would print %al if operands[0] is reg 0.
12388 w -- likewise, print the HImode name of the register.
12389 k -- likewise, print the SImode name of the register.
12390 q -- likewise, print the DImode name of the register.
12391 x -- likewise, print the V4SFmode name of the register.
12392 t -- likewise, print the V8SFmode name of the register.
12393 g -- likewise, print the V16SFmode name of the register.
12394 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12395 y -- print "st(0)" instead of "st" as a register.
12396 d -- print duplicated register operand for AVX instruction.
12397 D -- print condition for SSE cmp instruction.
12398 P -- if PIC, print an @PLT suffix.
12399 p -- print raw symbol name.
12400 X -- don't print any sort of PIC '@' suffix for a symbol.
12401 & -- print some in-use local-dynamic symbol name.
12402 H -- print a memory address offset by 8; used for sse high-parts
12403 Y -- print condition for XOP pcom* instruction.
12404 V -- print naked full integer register name without %.
12405 + -- print a branch hint as 'cs' or 'ds' prefix
12406 ; -- print a semicolon (after prefixes due to bug in older gas).
12407 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12408 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12409 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12410 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12411 N -- print maskz if it's constant 0 operand.
12414 void
12415 ix86_print_operand (FILE *file, rtx x, int code)
12417 if (code)
12419 switch (code)
12421 case 'A':
12422 switch (ASSEMBLER_DIALECT)
12424 case ASM_ATT:
12425 putc ('*', file);
12426 break;
12428 case ASM_INTEL:
12429 /* Intel syntax. For absolute addresses, registers should not
12430 be surrounded by braces. */
12431 if (!REG_P (x))
12433 putc ('[', file);
12434 ix86_print_operand (file, x, 0);
12435 putc (']', file);
12436 return;
12438 break;
12440 default:
12441 gcc_unreachable ();
12444 ix86_print_operand (file, x, 0);
12445 return;
12447 case 'E':
12448 /* Wrap address in an UNSPEC to declare special handling. */
12449 if (TARGET_64BIT)
12450 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12452 output_address (VOIDmode, x);
12453 return;
12455 case 'L':
12456 if (ASSEMBLER_DIALECT == ASM_ATT)
12457 putc ('l', file);
12458 return;
12460 case 'W':
12461 if (ASSEMBLER_DIALECT == ASM_ATT)
12462 putc ('w', file);
12463 return;
12465 case 'B':
12466 if (ASSEMBLER_DIALECT == ASM_ATT)
12467 putc ('b', file);
12468 return;
12470 case 'Q':
12471 if (ASSEMBLER_DIALECT == ASM_ATT)
12472 putc ('l', file);
12473 return;
12475 case 'S':
12476 if (ASSEMBLER_DIALECT == ASM_ATT)
12477 putc ('s', file);
12478 return;
12480 case 'T':
12481 if (ASSEMBLER_DIALECT == ASM_ATT)
12482 putc ('t', file);
12483 return;
12485 case 'O':
12486 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12487 if (ASSEMBLER_DIALECT != ASM_ATT)
12488 return;
12490 switch (GET_MODE_SIZE (GET_MODE (x)))
12492 case 2:
12493 putc ('w', file);
12494 break;
12496 case 4:
12497 putc ('l', file);
12498 break;
12500 case 8:
12501 putc ('q', file);
12502 break;
12504 default:
12505 output_operand_lossage ("invalid operand size for operand "
12506 "code 'O'");
12507 return;
12510 putc ('.', file);
12511 #endif
12512 return;
12514 case 'z':
12515 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12517 /* Opcodes don't get size suffixes if using Intel opcodes. */
12518 if (ASSEMBLER_DIALECT == ASM_INTEL)
12519 return;
12521 switch (GET_MODE_SIZE (GET_MODE (x)))
12523 case 1:
12524 putc ('b', file);
12525 return;
12527 case 2:
12528 putc ('w', file);
12529 return;
12531 case 4:
12532 putc ('l', file);
12533 return;
12535 case 8:
12536 putc ('q', file);
12537 return;
12539 default:
12540 output_operand_lossage ("invalid operand size for operand "
12541 "code 'z'");
12542 return;
12546 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12547 warning (0, "non-integer operand used with operand code %<z%>");
12548 /* FALLTHRU */
12550 case 'Z':
12551 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12552 if (ASSEMBLER_DIALECT == ASM_INTEL)
12553 return;
12555 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12557 switch (GET_MODE_SIZE (GET_MODE (x)))
12559 case 2:
12560 #ifdef HAVE_AS_IX86_FILDS
12561 putc ('s', file);
12562 #endif
12563 return;
12565 case 4:
12566 putc ('l', file);
12567 return;
12569 case 8:
12570 #ifdef HAVE_AS_IX86_FILDQ
12571 putc ('q', file);
12572 #else
12573 fputs ("ll", file);
12574 #endif
12575 return;
12577 default:
12578 break;
12581 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12583 /* 387 opcodes don't get size suffixes
12584 if the operands are registers. */
12585 if (STACK_REG_P (x))
12586 return;
12588 switch (GET_MODE_SIZE (GET_MODE (x)))
12590 case 4:
12591 putc ('s', file);
12592 return;
12594 case 8:
12595 putc ('l', file);
12596 return;
12598 case 12:
12599 case 16:
12600 putc ('t', file);
12601 return;
12603 default:
12604 break;
12607 else
12609 output_operand_lossage ("invalid operand type used with "
12610 "operand code 'Z'");
12611 return;
12614 output_operand_lossage ("invalid operand size for operand code 'Z'");
12615 return;
12617 case 'd':
12618 case 'b':
12619 case 'w':
12620 case 'k':
12621 case 'q':
12622 case 'h':
12623 case 't':
12624 case 'g':
12625 case 'y':
12626 case 'x':
12627 case 'X':
12628 case 'P':
12629 case 'p':
12630 case 'V':
12631 break;
12633 case 's':
12634 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12636 ix86_print_operand (file, x, 0);
12637 fputs (", ", file);
12639 return;
12641 case 'Y':
12642 switch (GET_CODE (x))
12644 case NE:
12645 fputs ("neq", file);
12646 break;
12647 case EQ:
12648 fputs ("eq", file);
12649 break;
12650 case GE:
12651 case GEU:
12652 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12653 break;
12654 case GT:
12655 case GTU:
12656 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12657 break;
12658 case LE:
12659 case LEU:
12660 fputs ("le", file);
12661 break;
12662 case LT:
12663 case LTU:
12664 fputs ("lt", file);
12665 break;
12666 case UNORDERED:
12667 fputs ("unord", file);
12668 break;
12669 case ORDERED:
12670 fputs ("ord", file);
12671 break;
12672 case UNEQ:
12673 fputs ("ueq", file);
12674 break;
12675 case UNGE:
12676 fputs ("nlt", file);
12677 break;
12678 case UNGT:
12679 fputs ("nle", file);
12680 break;
12681 case UNLE:
12682 fputs ("ule", file);
12683 break;
12684 case UNLT:
12685 fputs ("ult", file);
12686 break;
12687 case LTGT:
12688 fputs ("une", file);
12689 break;
12690 default:
12691 output_operand_lossage ("operand is not a condition code, "
12692 "invalid operand code 'Y'");
12693 return;
12695 return;
12697 case 'D':
12698 /* Little bit of braindamage here. The SSE compare instructions
12699 does use completely different names for the comparisons that the
12700 fp conditional moves. */
12701 switch (GET_CODE (x))
12703 case UNEQ:
12704 if (TARGET_AVX)
12706 fputs ("eq_us", file);
12707 break;
12709 /* FALLTHRU */
12710 case EQ:
12711 fputs ("eq", file);
12712 break;
12713 case UNLT:
12714 if (TARGET_AVX)
12716 fputs ("nge", file);
12717 break;
12719 /* FALLTHRU */
12720 case LT:
12721 fputs ("lt", file);
12722 break;
12723 case UNLE:
12724 if (TARGET_AVX)
12726 fputs ("ngt", file);
12727 break;
12729 /* FALLTHRU */
12730 case LE:
12731 fputs ("le", file);
12732 break;
12733 case UNORDERED:
12734 fputs ("unord", file);
12735 break;
12736 case LTGT:
12737 if (TARGET_AVX)
12739 fputs ("neq_oq", file);
12740 break;
12742 /* FALLTHRU */
12743 case NE:
12744 fputs ("neq", file);
12745 break;
12746 case GE:
12747 if (TARGET_AVX)
12749 fputs ("ge", file);
12750 break;
12752 /* FALLTHRU */
12753 case UNGE:
12754 fputs ("nlt", file);
12755 break;
12756 case GT:
12757 if (TARGET_AVX)
12759 fputs ("gt", file);
12760 break;
12762 /* FALLTHRU */
12763 case UNGT:
12764 fputs ("nle", file);
12765 break;
12766 case ORDERED:
12767 fputs ("ord", file);
12768 break;
12769 default:
12770 output_operand_lossage ("operand is not a condition code, "
12771 "invalid operand code 'D'");
12772 return;
12774 return;
12776 case 'F':
12777 case 'f':
12778 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12779 if (ASSEMBLER_DIALECT == ASM_ATT)
12780 putc ('.', file);
12781 gcc_fallthrough ();
12782 #endif
12784 case 'C':
12785 case 'c':
12786 if (!COMPARISON_P (x))
12788 output_operand_lossage ("operand is not a condition code, "
12789 "invalid operand code '%c'", code);
12790 return;
12792 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
12793 code == 'c' || code == 'f',
12794 code == 'F' || code == 'f',
12795 file);
12796 return;
12798 case 'H':
12799 if (!offsettable_memref_p (x))
12801 output_operand_lossage ("operand is not an offsettable memory "
12802 "reference, invalid operand code 'H'");
12803 return;
12805 /* It doesn't actually matter what mode we use here, as we're
12806 only going to use this for printing. */
12807 x = adjust_address_nv (x, DImode, 8);
12808 /* Output 'qword ptr' for intel assembler dialect. */
12809 if (ASSEMBLER_DIALECT == ASM_INTEL)
12810 code = 'q';
12811 break;
12813 case 'K':
12814 if (!CONST_INT_P (x))
12816 output_operand_lossage ("operand is not an integer, invalid "
12817 "operand code 'K'");
12818 return;
12821 if (INTVAL (x) & IX86_HLE_ACQUIRE)
12822 #ifdef HAVE_AS_IX86_HLE
12823 fputs ("xacquire ", file);
12824 #else
12825 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
12826 #endif
12827 else if (INTVAL (x) & IX86_HLE_RELEASE)
12828 #ifdef HAVE_AS_IX86_HLE
12829 fputs ("xrelease ", file);
12830 #else
12831 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
12832 #endif
12833 /* We do not want to print value of the operand. */
12834 return;
12836 case 'N':
12837 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
12838 fputs ("{z}", file);
12839 return;
12841 case 'r':
12842 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
12844 output_operand_lossage ("operand is not a specific integer, "
12845 "invalid operand code 'r'");
12846 return;
12849 if (ASSEMBLER_DIALECT == ASM_INTEL)
12850 fputs (", ", file);
12852 fputs ("{sae}", file);
12854 if (ASSEMBLER_DIALECT == ASM_ATT)
12855 fputs (", ", file);
12857 return;
12859 case 'R':
12860 if (!CONST_INT_P (x))
12862 output_operand_lossage ("operand is not an integer, invalid "
12863 "operand code 'R'");
12864 return;
12867 if (ASSEMBLER_DIALECT == ASM_INTEL)
12868 fputs (", ", file);
12870 switch (INTVAL (x))
12872 case ROUND_NEAREST_INT | ROUND_SAE:
12873 fputs ("{rn-sae}", file);
12874 break;
12875 case ROUND_NEG_INF | ROUND_SAE:
12876 fputs ("{rd-sae}", file);
12877 break;
12878 case ROUND_POS_INF | ROUND_SAE:
12879 fputs ("{ru-sae}", file);
12880 break;
12881 case ROUND_ZERO | ROUND_SAE:
12882 fputs ("{rz-sae}", file);
12883 break;
12884 default:
12885 output_operand_lossage ("operand is not a specific integer, "
12886 "invalid operand code 'R'");
12889 if (ASSEMBLER_DIALECT == ASM_ATT)
12890 fputs (", ", file);
12892 return;
12894 case '*':
12895 if (ASSEMBLER_DIALECT == ASM_ATT)
12896 putc ('*', file);
12897 return;
12899 case '&':
12901 const char *name = get_some_local_dynamic_name ();
12902 if (name == NULL)
12903 output_operand_lossage ("'%%&' used without any "
12904 "local dynamic TLS references");
12905 else
12906 assemble_name (file, name);
12907 return;
12910 case '+':
12912 rtx x;
12914 if (!optimize
12915 || optimize_function_for_size_p (cfun)
12916 || !TARGET_BRANCH_PREDICTION_HINTS)
12917 return;
12919 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12920 if (x)
12922 int pred_val = profile_probability::from_reg_br_prob_note
12923 (XINT (x, 0)).to_reg_br_prob_base ();
12925 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12926 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12928 bool taken = pred_val > REG_BR_PROB_BASE / 2;
12929 bool cputaken
12930 = final_forward_branch_p (current_output_insn) == 0;
12932 /* Emit hints only in the case default branch prediction
12933 heuristics would fail. */
12934 if (taken != cputaken)
12936 /* We use 3e (DS) prefix for taken branches and
12937 2e (CS) prefix for not taken branches. */
12938 if (taken)
12939 fputs ("ds ; ", file);
12940 else
12941 fputs ("cs ; ", file);
12945 return;
12948 case ';':
12949 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12950 putc (';', file);
12951 #endif
12952 return;
12954 case '~':
12955 putc (TARGET_AVX2 ? 'i' : 'f', file);
12956 return;
12958 case 'M':
12959 if (TARGET_X32)
12961 /* NB: 32-bit indices in VSIB address are sign-extended
12962 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
12963 sign-extended to 0xfffffffff7fa3010 which is invalid
12964 address. Add addr32 prefix if there is no base
12965 register nor symbol. */
12966 bool ok;
12967 struct ix86_address parts;
12968 ok = ix86_decompose_address (x, &parts);
12969 gcc_assert (ok && parts.index == NULL_RTX);
12970 if (parts.base == NULL_RTX
12971 && (parts.disp == NULL_RTX
12972 || !symbolic_operand (parts.disp,
12973 GET_MODE (parts.disp))))
12974 fputs ("addr32 ", file);
12976 return;
12978 case '^':
12979 if (TARGET_64BIT && Pmode != word_mode)
12980 fputs ("addr32 ", file);
12981 return;
12983 case '!':
12984 if (ix86_notrack_prefixed_insn_p (current_output_insn))
12985 fputs ("notrack ", file);
12986 return;
12988 default:
12989 output_operand_lossage ("invalid operand code '%c'", code);
12993 if (REG_P (x))
12994 print_reg (x, code, file);
12996 else if (MEM_P (x))
12998 rtx addr = XEXP (x, 0);
13000 /* No `byte ptr' prefix for call instructions ... */
13001 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13003 machine_mode mode = GET_MODE (x);
13004 const char *size;
13006 /* Check for explicit size override codes. */
13007 if (code == 'b')
13008 size = "BYTE";
13009 else if (code == 'w')
13010 size = "WORD";
13011 else if (code == 'k')
13012 size = "DWORD";
13013 else if (code == 'q')
13014 size = "QWORD";
13015 else if (code == 'x')
13016 size = "XMMWORD";
13017 else if (code == 't')
13018 size = "YMMWORD";
13019 else if (code == 'g')
13020 size = "ZMMWORD";
13021 else if (mode == BLKmode)
13022 /* ... or BLKmode operands, when not overridden. */
13023 size = NULL;
13024 else
13025 switch (GET_MODE_SIZE (mode))
13027 case 1: size = "BYTE"; break;
13028 case 2: size = "WORD"; break;
13029 case 4: size = "DWORD"; break;
13030 case 8: size = "QWORD"; break;
13031 case 12: size = "TBYTE"; break;
13032 case 16:
13033 if (mode == XFmode)
13034 size = "TBYTE";
13035 else
13036 size = "XMMWORD";
13037 break;
13038 case 32: size = "YMMWORD"; break;
13039 case 64: size = "ZMMWORD"; break;
13040 default:
13041 gcc_unreachable ();
13043 if (size)
13045 fputs (size, file);
13046 fputs (" PTR ", file);
13050 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13051 output_operand_lossage ("invalid constraints for operand");
13052 else
13053 ix86_print_operand_address_as
13054 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13057 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13059 long l;
13061 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13063 if (ASSEMBLER_DIALECT == ASM_ATT)
13064 putc ('$', file);
13065 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13066 if (code == 'q')
13067 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13068 (unsigned long long) (int) l);
13069 else
13070 fprintf (file, "0x%08x", (unsigned int) l);
13073 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13075 long l[2];
13077 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13079 if (ASSEMBLER_DIALECT == ASM_ATT)
13080 putc ('$', file);
13081 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13084 /* These float cases don't actually occur as immediate operands. */
13085 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13087 char dstr[30];
13089 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13090 fputs (dstr, file);
13093 else
13095 /* We have patterns that allow zero sets of memory, for instance.
13096 In 64-bit mode, we should probably support all 8-byte vectors,
13097 since we can in fact encode that into an immediate. */
13098 if (GET_CODE (x) == CONST_VECTOR)
13100 if (x != CONST0_RTX (GET_MODE (x)))
13101 output_operand_lossage ("invalid vector immediate");
13102 x = const0_rtx;
13105 if (code != 'P' && code != 'p')
13107 if (CONST_INT_P (x))
13109 if (ASSEMBLER_DIALECT == ASM_ATT)
13110 putc ('$', file);
13112 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13113 || GET_CODE (x) == LABEL_REF)
13115 if (ASSEMBLER_DIALECT == ASM_ATT)
13116 putc ('$', file);
13117 else
13118 fputs ("OFFSET FLAT:", file);
13121 if (CONST_INT_P (x))
13122 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13123 else if (flag_pic || MACHOPIC_INDIRECT)
13124 output_pic_addr_const (file, x, code);
13125 else
13126 output_addr_const (file, x);
13130 static bool
13131 ix86_print_operand_punct_valid_p (unsigned char code)
13133 return (code == '*' || code == '+' || code == '&' || code == ';'
13134 || code == '~' || code == '^' || code == '!');
13137 /* Print a memory operand whose address is ADDR. */
13139 static void
13140 ix86_print_operand_address_as (FILE *file, rtx addr,
13141 addr_space_t as, bool no_rip)
13143 struct ix86_address parts;
13144 rtx base, index, disp;
13145 int scale;
13146 int ok;
13147 bool vsib = false;
13148 int code = 0;
13150 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13152 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13153 gcc_assert (parts.index == NULL_RTX);
13154 parts.index = XVECEXP (addr, 0, 1);
13155 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13156 addr = XVECEXP (addr, 0, 0);
13157 vsib = true;
13159 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13161 gcc_assert (TARGET_64BIT);
13162 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13163 code = 'q';
13165 else
13166 ok = ix86_decompose_address (addr, &parts);
13168 gcc_assert (ok);
13170 base = parts.base;
13171 index = parts.index;
13172 disp = parts.disp;
13173 scale = parts.scale;
13175 if (ADDR_SPACE_GENERIC_P (as))
13176 as = parts.seg;
13177 else
13178 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13180 if (!ADDR_SPACE_GENERIC_P (as))
13182 if (ASSEMBLER_DIALECT == ASM_ATT)
13183 putc ('%', file);
13185 switch (as)
13187 case ADDR_SPACE_SEG_FS:
13188 fputs ("fs:", file);
13189 break;
13190 case ADDR_SPACE_SEG_GS:
13191 fputs ("gs:", file);
13192 break;
13193 default:
13194 gcc_unreachable ();
13198 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13199 if (TARGET_64BIT && !base && !index && !no_rip)
13201 rtx symbol = disp;
13203 if (GET_CODE (disp) == CONST
13204 && GET_CODE (XEXP (disp, 0)) == PLUS
13205 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13206 symbol = XEXP (XEXP (disp, 0), 0);
13208 if (GET_CODE (symbol) == LABEL_REF
13209 || (GET_CODE (symbol) == SYMBOL_REF
13210 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13211 base = pc_rtx;
13214 if (!base && !index)
13216 /* Displacement only requires special attention. */
13217 if (CONST_INT_P (disp))
13219 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13220 fputs ("ds:", file);
13221 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13223 /* Load the external function address via the GOT slot to avoid PLT. */
13224 else if (GET_CODE (disp) == CONST
13225 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13226 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13227 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13228 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13229 output_pic_addr_const (file, disp, 0);
13230 else if (flag_pic)
13231 output_pic_addr_const (file, disp, 0);
13232 else
13233 output_addr_const (file, disp);
13235 else
13237 /* Print SImode register names to force addr32 prefix. */
13238 if (SImode_address_operand (addr, VOIDmode))
13240 if (flag_checking)
13242 gcc_assert (TARGET_64BIT);
13243 switch (GET_CODE (addr))
13245 case SUBREG:
13246 gcc_assert (GET_MODE (addr) == SImode);
13247 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13248 break;
13249 case ZERO_EXTEND:
13250 case AND:
13251 gcc_assert (GET_MODE (addr) == DImode);
13252 break;
13253 default:
13254 gcc_unreachable ();
13257 gcc_assert (!code);
13258 code = 'k';
13260 else if (code == 0
13261 && TARGET_X32
13262 && disp
13263 && CONST_INT_P (disp)
13264 && INTVAL (disp) < -16*1024*1024)
13266 /* X32 runs in 64-bit mode, where displacement, DISP, in
13267 address DISP(%r64), is encoded as 32-bit immediate sign-
13268 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13269 address is %r64 + 0xffffffffbffffd00. When %r64 <
13270 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13271 which is invalid for x32. The correct address is %r64
13272 - 0x40000300 == 0xf7ffdd64. To properly encode
13273 -0x40000300(%r64) for x32, we zero-extend negative
13274 displacement by forcing addr32 prefix which truncates
13275 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13276 zero-extend all negative displacements, including -1(%rsp).
13277 However, for small negative displacements, sign-extension
13278 won't cause overflow. We only zero-extend negative
13279 displacements if they < -16*1024*1024, which is also used
13280 to check legitimate address displacements for PIC. */
13281 code = 'k';
13284 /* Since the upper 32 bits of RSP are always zero for x32,
13285 we can encode %esp as %rsp to avoid 0x67 prefix if
13286 there is no index register. */
13287 if (TARGET_X32 && Pmode == SImode
13288 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13289 code = 'q';
13291 if (ASSEMBLER_DIALECT == ASM_ATT)
13293 if (disp)
13295 if (flag_pic)
13296 output_pic_addr_const (file, disp, 0);
13297 else if (GET_CODE (disp) == LABEL_REF)
13298 output_asm_label (disp);
13299 else
13300 output_addr_const (file, disp);
13303 putc ('(', file);
13304 if (base)
13305 print_reg (base, code, file);
13306 if (index)
13308 putc (',', file);
13309 print_reg (index, vsib ? 0 : code, file);
13310 if (scale != 1 || vsib)
13311 fprintf (file, ",%d", scale);
13313 putc (')', file);
13315 else
13317 rtx offset = NULL_RTX;
13319 if (disp)
13321 /* Pull out the offset of a symbol; print any symbol itself. */
13322 if (GET_CODE (disp) == CONST
13323 && GET_CODE (XEXP (disp, 0)) == PLUS
13324 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13326 offset = XEXP (XEXP (disp, 0), 1);
13327 disp = gen_rtx_CONST (VOIDmode,
13328 XEXP (XEXP (disp, 0), 0));
13331 if (flag_pic)
13332 output_pic_addr_const (file, disp, 0);
13333 else if (GET_CODE (disp) == LABEL_REF)
13334 output_asm_label (disp);
13335 else if (CONST_INT_P (disp))
13336 offset = disp;
13337 else
13338 output_addr_const (file, disp);
13341 putc ('[', file);
13342 if (base)
13344 print_reg (base, code, file);
13345 if (offset)
13347 if (INTVAL (offset) >= 0)
13348 putc ('+', file);
13349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13352 else if (offset)
13353 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13354 else
13355 putc ('0', file);
13357 if (index)
13359 putc ('+', file);
13360 print_reg (index, vsib ? 0 : code, file);
13361 if (scale != 1 || vsib)
13362 fprintf (file, "*%d", scale);
13364 putc (']', file);
13369 static void
13370 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13372 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13375 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13377 static bool
13378 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13380 rtx op;
13382 if (GET_CODE (x) != UNSPEC)
13383 return false;
13385 op = XVECEXP (x, 0, 0);
13386 switch (XINT (x, 1))
13388 case UNSPEC_GOTOFF:
13389 output_addr_const (file, op);
13390 fputs ("@gotoff", file);
13391 break;
13392 case UNSPEC_GOTTPOFF:
13393 output_addr_const (file, op);
13394 /* FIXME: This might be @TPOFF in Sun ld. */
13395 fputs ("@gottpoff", file);
13396 break;
13397 case UNSPEC_TPOFF:
13398 output_addr_const (file, op);
13399 fputs ("@tpoff", file);
13400 break;
13401 case UNSPEC_NTPOFF:
13402 output_addr_const (file, op);
13403 if (TARGET_64BIT)
13404 fputs ("@tpoff", file);
13405 else
13406 fputs ("@ntpoff", file);
13407 break;
13408 case UNSPEC_DTPOFF:
13409 output_addr_const (file, op);
13410 fputs ("@dtpoff", file);
13411 break;
13412 case UNSPEC_GOTNTPOFF:
13413 output_addr_const (file, op);
13414 if (TARGET_64BIT)
13415 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13416 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13417 else
13418 fputs ("@gotntpoff", file);
13419 break;
13420 case UNSPEC_INDNTPOFF:
13421 output_addr_const (file, op);
13422 fputs ("@indntpoff", file);
13423 break;
13424 #if TARGET_MACHO
13425 case UNSPEC_MACHOPIC_OFFSET:
13426 output_addr_const (file, op);
13427 putc ('-', file);
13428 machopic_output_function_base_name (file);
13429 break;
13430 #endif
13432 default:
13433 return false;
13436 return true;
13440 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13441 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13442 is the expression of the binary operation. The output may either be
13443 emitted here, or returned to the caller, like all output_* functions.
13445 There is no guarantee that the operands are the same mode, as they
13446 might be within FLOAT or FLOAT_EXTEND expressions. */
13448 #ifndef SYSV386_COMPAT
13449 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13450 wants to fix the assemblers because that causes incompatibility
13451 with gcc. No-one wants to fix gcc because that causes
13452 incompatibility with assemblers... You can use the option of
13453 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13454 #define SYSV386_COMPAT 1
13455 #endif
13457 const char *
13458 output_387_binary_op (rtx_insn *insn, rtx *operands)
13460 static char buf[40];
13461 const char *p;
13462 bool is_sse
13463 = (SSE_REG_P (operands[0])
13464 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13466 if (is_sse)
13467 p = "%v";
13468 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13469 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13470 p = "fi";
13471 else
13472 p = "f";
13474 strcpy (buf, p);
13476 switch (GET_CODE (operands[3]))
13478 case PLUS:
13479 p = "add"; break;
13480 case MINUS:
13481 p = "sub"; break;
13482 case MULT:
13483 p = "mul"; break;
13484 case DIV:
13485 p = "div"; break;
13486 default:
13487 gcc_unreachable ();
13490 strcat (buf, p);
13492 if (is_sse)
13494 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13495 strcat (buf, p);
13497 if (TARGET_AVX)
13498 p = "\t{%2, %1, %0|%0, %1, %2}";
13499 else
13500 p = "\t{%2, %0|%0, %2}";
13502 strcat (buf, p);
13503 return buf;
13506 /* Even if we do not want to check the inputs, this documents input
13507 constraints. Which helps in understanding the following code. */
13508 if (flag_checking)
13510 if (STACK_REG_P (operands[0])
13511 && ((REG_P (operands[1])
13512 && REGNO (operands[0]) == REGNO (operands[1])
13513 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13514 || (REG_P (operands[2])
13515 && REGNO (operands[0]) == REGNO (operands[2])
13516 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13517 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13518 ; /* ok */
13519 else
13520 gcc_unreachable ();
13523 switch (GET_CODE (operands[3]))
13525 case MULT:
13526 case PLUS:
13527 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13528 std::swap (operands[1], operands[2]);
13530 /* know operands[0] == operands[1]. */
13532 if (MEM_P (operands[2]))
13534 p = "%Z2\t%2";
13535 break;
13538 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13540 if (STACK_TOP_P (operands[0]))
13541 /* How is it that we are storing to a dead operand[2]?
13542 Well, presumably operands[1] is dead too. We can't
13543 store the result to st(0) as st(0) gets popped on this
13544 instruction. Instead store to operands[2] (which I
13545 think has to be st(1)). st(1) will be popped later.
13546 gcc <= 2.8.1 didn't have this check and generated
13547 assembly code that the Unixware assembler rejected. */
13548 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13549 else
13550 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13551 break;
13554 if (STACK_TOP_P (operands[0]))
13555 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13556 else
13557 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13558 break;
13560 case MINUS:
13561 case DIV:
13562 if (MEM_P (operands[1]))
13564 p = "r%Z1\t%1";
13565 break;
13568 if (MEM_P (operands[2]))
13570 p = "%Z2\t%2";
13571 break;
13574 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13576 #if SYSV386_COMPAT
13577 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13578 derived assemblers, confusingly reverse the direction of
13579 the operation for fsub{r} and fdiv{r} when the
13580 destination register is not st(0). The Intel assembler
13581 doesn't have this brain damage. Read !SYSV386_COMPAT to
13582 figure out what the hardware really does. */
13583 if (STACK_TOP_P (operands[0]))
13584 p = "{p\t%0, %2|rp\t%2, %0}";
13585 else
13586 p = "{rp\t%2, %0|p\t%0, %2}";
13587 #else
13588 if (STACK_TOP_P (operands[0]))
13589 /* As above for fmul/fadd, we can't store to st(0). */
13590 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13591 else
13592 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13593 #endif
13594 break;
13597 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13599 #if SYSV386_COMPAT
13600 if (STACK_TOP_P (operands[0]))
13601 p = "{rp\t%0, %1|p\t%1, %0}";
13602 else
13603 p = "{p\t%1, %0|rp\t%0, %1}";
13604 #else
13605 if (STACK_TOP_P (operands[0]))
13606 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13607 else
13608 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13609 #endif
13610 break;
13613 if (STACK_TOP_P (operands[0]))
13615 if (STACK_TOP_P (operands[1]))
13616 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13617 else
13618 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13619 break;
13621 else if (STACK_TOP_P (operands[1]))
13623 #if SYSV386_COMPAT
13624 p = "{\t%1, %0|r\t%0, %1}";
13625 #else
13626 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13627 #endif
13629 else
13631 #if SYSV386_COMPAT
13632 p = "{r\t%2, %0|\t%0, %2}";
13633 #else
13634 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13635 #endif
13637 break;
13639 default:
13640 gcc_unreachable ();
13643 strcat (buf, p);
13644 return buf;
13647 /* Return needed mode for entity in optimize_mode_switching pass. */
13649 static int
13650 ix86_dirflag_mode_needed (rtx_insn *insn)
13652 if (CALL_P (insn))
13654 if (cfun->machine->func_type == TYPE_NORMAL)
13655 return X86_DIRFLAG_ANY;
13656 else
13657 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
13658 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
13661 if (recog_memoized (insn) < 0)
13662 return X86_DIRFLAG_ANY;
13664 if (get_attr_type (insn) == TYPE_STR)
13666 /* Emit cld instruction if stringops are used in the function. */
13667 if (cfun->machine->func_type == TYPE_NORMAL)
13668 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
13669 else
13670 return X86_DIRFLAG_RESET;
13673 return X86_DIRFLAG_ANY;
13676 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
13678 static bool
13679 ix86_check_avx_upper_register (const_rtx exp)
13681 return (SSE_REG_P (exp)
13682 && !EXT_REX_SSE_REG_P (exp)
13683 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
13686 /* Return needed mode for entity in optimize_mode_switching pass. */
13688 static int
13689 ix86_avx_u128_mode_needed (rtx_insn *insn)
13691 if (CALL_P (insn))
13693 rtx link;
13695 /* Needed mode is set to AVX_U128_CLEAN if there are
13696 no 256bit or 512bit modes used in function arguments. */
13697 for (link = CALL_INSN_FUNCTION_USAGE (insn);
13698 link;
13699 link = XEXP (link, 1))
13701 if (GET_CODE (XEXP (link, 0)) == USE)
13703 rtx arg = XEXP (XEXP (link, 0), 0);
13705 if (ix86_check_avx_upper_register (arg))
13706 return AVX_U128_DIRTY;
13710 /* If the function is known to preserve some SSE registers,
13711 RA and previous passes can legitimately rely on that for
13712 modes wider than 256 bits. It's only safe to issue a
13713 vzeroupper if all SSE registers are clobbered. */
13714 const function_abi &abi = insn_callee_abi (insn);
13715 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
13716 abi.mode_clobbers (V4DImode)))
13717 return AVX_U128_ANY;
13719 return AVX_U128_CLEAN;
13722 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13723 Hardware changes state only when a 256bit register is written to,
13724 but we need to prevent the compiler from moving optimal insertion
13725 point above eventual read from 256bit or 512 bit register. */
13726 subrtx_iterator::array_type array;
13727 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13728 if (ix86_check_avx_upper_register (*iter))
13729 return AVX_U128_DIRTY;
13731 return AVX_U128_ANY;
13734 /* Return mode that i387 must be switched into
13735 prior to the execution of insn. */
13737 static int
13738 ix86_i387_mode_needed (int entity, rtx_insn *insn)
13740 enum attr_i387_cw mode;
13742 /* The mode UNINITIALIZED is used to store control word after a
13743 function call or ASM pattern. The mode ANY specify that function
13744 has no requirements on the control word and make no changes in the
13745 bits we are interested in. */
13747 if (CALL_P (insn)
13748 || (NONJUMP_INSN_P (insn)
13749 && (asm_noperands (PATTERN (insn)) >= 0
13750 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13751 return I387_CW_UNINITIALIZED;
13753 if (recog_memoized (insn) < 0)
13754 return I387_CW_ANY;
13756 mode = get_attr_i387_cw (insn);
13758 switch (entity)
13760 case I387_ROUNDEVEN:
13761 if (mode == I387_CW_ROUNDEVEN)
13762 return mode;
13763 break;
13765 case I387_TRUNC:
13766 if (mode == I387_CW_TRUNC)
13767 return mode;
13768 break;
13770 case I387_FLOOR:
13771 if (mode == I387_CW_FLOOR)
13772 return mode;
13773 break;
13775 case I387_CEIL:
13776 if (mode == I387_CW_CEIL)
13777 return mode;
13778 break;
13780 default:
13781 gcc_unreachable ();
13784 return I387_CW_ANY;
13787 /* Return mode that entity must be switched into
13788 prior to the execution of insn. */
13790 static int
13791 ix86_mode_needed (int entity, rtx_insn *insn)
13793 switch (entity)
13795 case X86_DIRFLAG:
13796 return ix86_dirflag_mode_needed (insn);
13797 case AVX_U128:
13798 return ix86_avx_u128_mode_needed (insn);
13799 case I387_ROUNDEVEN:
13800 case I387_TRUNC:
13801 case I387_FLOOR:
13802 case I387_CEIL:
13803 return ix86_i387_mode_needed (entity, insn);
13804 default:
13805 gcc_unreachable ();
13807 return 0;
13810 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
13812 static void
13813 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
13815 if (ix86_check_avx_upper_register (dest))
13817 bool *used = (bool *) data;
13818 *used = true;
13822 /* Calculate mode of upper 128bit AVX registers after the insn. */
13824 static int
13825 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
13827 rtx pat = PATTERN (insn);
13829 if (vzeroupper_pattern (pat, VOIDmode)
13830 || vzeroall_pattern (pat, VOIDmode))
13831 return AVX_U128_CLEAN;
13833 /* We know that state is clean after CALL insn if there are no
13834 256bit or 512bit registers used in the function return register. */
13835 if (CALL_P (insn))
13837 bool avx_upper_reg_found = false;
13838 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
13840 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
13843 /* Otherwise, return current mode. Remember that if insn
13844 references AVX 256bit or 512bit registers, the mode was already
13845 changed to DIRTY from MODE_NEEDED. */
13846 return mode;
13849 /* Return the mode that an insn results in. */
13851 static int
13852 ix86_mode_after (int entity, int mode, rtx_insn *insn)
13854 switch (entity)
13856 case X86_DIRFLAG:
13857 return mode;
13858 case AVX_U128:
13859 return ix86_avx_u128_mode_after (mode, insn);
13860 case I387_ROUNDEVEN:
13861 case I387_TRUNC:
13862 case I387_FLOOR:
13863 case I387_CEIL:
13864 return mode;
13865 default:
13866 gcc_unreachable ();
13870 static int
13871 ix86_dirflag_mode_entry (void)
13873 /* For TARGET_CLD or in the interrupt handler we can't assume
13874 direction flag state at function entry. */
13875 if (TARGET_CLD
13876 || cfun->machine->func_type != TYPE_NORMAL)
13877 return X86_DIRFLAG_ANY;
13879 return X86_DIRFLAG_RESET;
13882 static int
13883 ix86_avx_u128_mode_entry (void)
13885 tree arg;
13887 /* Entry mode is set to AVX_U128_DIRTY if there are
13888 256bit or 512bit modes used in function arguments. */
13889 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
13890 arg = TREE_CHAIN (arg))
13892 rtx incoming = DECL_INCOMING_RTL (arg);
13894 if (incoming && ix86_check_avx_upper_register (incoming))
13895 return AVX_U128_DIRTY;
13898 return AVX_U128_CLEAN;
13901 /* Return a mode that ENTITY is assumed to be
13902 switched to at function entry. */
13904 static int
13905 ix86_mode_entry (int entity)
13907 switch (entity)
13909 case X86_DIRFLAG:
13910 return ix86_dirflag_mode_entry ();
13911 case AVX_U128:
13912 return ix86_avx_u128_mode_entry ();
13913 case I387_ROUNDEVEN:
13914 case I387_TRUNC:
13915 case I387_FLOOR:
13916 case I387_CEIL:
13917 return I387_CW_ANY;
13918 default:
13919 gcc_unreachable ();
13923 static int
13924 ix86_avx_u128_mode_exit (void)
13926 rtx reg = crtl->return_rtx;
13928 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
13929 or 512 bit modes used in the function return register. */
13930 if (reg && ix86_check_avx_upper_register (reg))
13931 return AVX_U128_DIRTY;
13933 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
13934 modes used in function arguments, otherwise return AVX_U128_CLEAN.
13936 return ix86_avx_u128_mode_entry ();
13939 /* Return a mode that ENTITY is assumed to be
13940 switched to at function exit. */
13942 static int
13943 ix86_mode_exit (int entity)
13945 switch (entity)
13947 case X86_DIRFLAG:
13948 return X86_DIRFLAG_ANY;
13949 case AVX_U128:
13950 return ix86_avx_u128_mode_exit ();
13951 case I387_ROUNDEVEN:
13952 case I387_TRUNC:
13953 case I387_FLOOR:
13954 case I387_CEIL:
13955 return I387_CW_ANY;
13956 default:
13957 gcc_unreachable ();
13961 static int
13962 ix86_mode_priority (int, int n)
13964 return n;
13967 /* Output code to initialize control word copies used by trunc?f?i and
13968 rounding patterns. CURRENT_MODE is set to current control word,
13969 while NEW_MODE is set to new control word. */
13971 static void
13972 emit_i387_cw_initialization (int mode)
13974 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13975 rtx new_mode;
13977 enum ix86_stack_slot slot;
13979 rtx reg = gen_reg_rtx (HImode);
13981 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13982 emit_move_insn (reg, copy_rtx (stored_mode));
13984 switch (mode)
13986 case I387_CW_ROUNDEVEN:
13987 /* round to nearest */
13988 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13989 slot = SLOT_CW_ROUNDEVEN;
13990 break;
13992 case I387_CW_TRUNC:
13993 /* round toward zero (truncate) */
13994 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13995 slot = SLOT_CW_TRUNC;
13996 break;
13998 case I387_CW_FLOOR:
13999 /* round down toward -oo */
14000 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14001 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14002 slot = SLOT_CW_FLOOR;
14003 break;
14005 case I387_CW_CEIL:
14006 /* round up toward +oo */
14007 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14008 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14009 slot = SLOT_CW_CEIL;
14010 break;
14012 default:
14013 gcc_unreachable ();
14016 gcc_assert (slot < MAX_386_STACK_LOCALS);
14018 new_mode = assign_386_stack_local (HImode, slot);
14019 emit_move_insn (new_mode, reg);
14022 /* Generate one or more insns to set ENTITY to MODE. */
14024 static void
14025 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14026 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14028 switch (entity)
14030 case X86_DIRFLAG:
14031 if (mode == X86_DIRFLAG_RESET)
14032 emit_insn (gen_cld ());
14033 break;
14034 case AVX_U128:
14035 if (mode == AVX_U128_CLEAN)
14036 emit_insn (gen_avx_vzeroupper ());
14037 break;
14038 case I387_ROUNDEVEN:
14039 case I387_TRUNC:
14040 case I387_FLOOR:
14041 case I387_CEIL:
14042 if (mode != I387_CW_ANY
14043 && mode != I387_CW_UNINITIALIZED)
14044 emit_i387_cw_initialization (mode);
14045 break;
14046 default:
14047 gcc_unreachable ();
14051 /* Output code for INSN to convert a float to a signed int. OPERANDS
14052 are the insn operands. The output may be [HSD]Imode and the input
14053 operand may be [SDX]Fmode. */
14055 const char *
14056 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14058 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14059 bool dimode_p = GET_MODE (operands[0]) == DImode;
14060 int round_mode = get_attr_i387_cw (insn);
14062 static char buf[40];
14063 const char *p;
14065 /* Jump through a hoop or two for DImode, since the hardware has no
14066 non-popping instruction. We used to do this a different way, but
14067 that was somewhat fragile and broke with post-reload splitters. */
14068 if ((dimode_p || fisttp) && !stack_top_dies)
14069 output_asm_insn ("fld\t%y1", operands);
14071 gcc_assert (STACK_TOP_P (operands[1]));
14072 gcc_assert (MEM_P (operands[0]));
14073 gcc_assert (GET_MODE (operands[1]) != TFmode);
14075 if (fisttp)
14076 return "fisttp%Z0\t%0";
14078 strcpy (buf, "fist");
14080 if (round_mode != I387_CW_ANY)
14081 output_asm_insn ("fldcw\t%3", operands);
14083 p = "p%Z0\t%0";
14084 strcat (buf, p + !(stack_top_dies || dimode_p));
14086 output_asm_insn (buf, operands);
14088 if (round_mode != I387_CW_ANY)
14089 output_asm_insn ("fldcw\t%2", operands);
14091 return "";
14094 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14095 have the values zero or one, indicates the ffreep insn's operand
14096 from the OPERANDS array. */
14098 static const char *
14099 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14101 if (TARGET_USE_FFREEP)
14102 #ifdef HAVE_AS_IX86_FFREEP
14103 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14104 #else
14106 static char retval[32];
14107 int regno = REGNO (operands[opno]);
14109 gcc_assert (STACK_REGNO_P (regno));
14111 regno -= FIRST_STACK_REG;
14113 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14114 return retval;
14116 #endif
14118 return opno ? "fstp\t%y1" : "fstp\t%y0";
14122 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14123 should be used. UNORDERED_P is true when fucom should be used. */
14125 const char *
14126 output_fp_compare (rtx_insn *insn, rtx *operands,
14127 bool eflags_p, bool unordered_p)
14129 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14130 bool stack_top_dies;
14132 static char buf[40];
14133 const char *p;
14135 gcc_assert (STACK_TOP_P (xops[0]));
14137 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14139 if (eflags_p)
14141 p = unordered_p ? "fucomi" : "fcomi";
14142 strcpy (buf, p);
14144 p = "p\t{%y1, %0|%0, %y1}";
14145 strcat (buf, p + !stack_top_dies);
14147 return buf;
14150 if (STACK_REG_P (xops[1])
14151 && stack_top_dies
14152 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14154 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14156 /* If both the top of the 387 stack die, and the other operand
14157 is also a stack register that dies, then this must be a
14158 `fcompp' float compare. */
14159 p = unordered_p ? "fucompp" : "fcompp";
14160 strcpy (buf, p);
14162 else if (const0_operand (xops[1], VOIDmode))
14164 gcc_assert (!unordered_p);
14165 strcpy (buf, "ftst");
14167 else
14169 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14171 gcc_assert (!unordered_p);
14172 p = "ficom";
14174 else
14175 p = unordered_p ? "fucom" : "fcom";
14177 strcpy (buf, p);
14179 p = "p%Z2\t%y2";
14180 strcat (buf, p + !stack_top_dies);
14183 output_asm_insn (buf, operands);
14184 return "fnstsw\t%0";
14187 void
14188 ix86_output_addr_vec_elt (FILE *file, int value)
14190 const char *directive = ASM_LONG;
14192 #ifdef ASM_QUAD
14193 if (TARGET_LP64)
14194 directive = ASM_QUAD;
14195 #else
14196 gcc_assert (!TARGET_64BIT);
14197 #endif
14199 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14202 void
14203 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14205 const char *directive = ASM_LONG;
14207 #ifdef ASM_QUAD
14208 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14209 directive = ASM_QUAD;
14210 #else
14211 gcc_assert (!TARGET_64BIT);
14212 #endif
14213 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14214 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14215 fprintf (file, "%s%s%d-%s%d\n",
14216 directive, LPREFIX, value, LPREFIX, rel);
14217 #if TARGET_MACHO
14218 else if (TARGET_MACHO)
14220 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14221 machopic_output_function_base_name (file);
14222 putc ('\n', file);
14224 #endif
14225 else if (HAVE_AS_GOTOFF_IN_DATA)
14226 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14227 else
14228 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14229 GOT_SYMBOL_NAME, LPREFIX, value);
14232 #define LEA_MAX_STALL (3)
14233 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14235 /* Increase given DISTANCE in half-cycles according to
14236 dependencies between PREV and NEXT instructions.
14237 Add 1 half-cycle if there is no dependency and
14238 go to next cycle if there is some dependecy. */
14240 static unsigned int
14241 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14243 df_ref def, use;
14245 if (!prev || !next)
14246 return distance + (distance & 1) + 2;
14248 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14249 return distance + 1;
14251 FOR_EACH_INSN_USE (use, next)
14252 FOR_EACH_INSN_DEF (def, prev)
14253 if (!DF_REF_IS_ARTIFICIAL (def)
14254 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14255 return distance + (distance & 1) + 2;
14257 return distance + 1;
14260 /* Function checks if instruction INSN defines register number
14261 REGNO1 or REGNO2. */
14263 bool
14264 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14265 rtx_insn *insn)
14267 df_ref def;
14269 FOR_EACH_INSN_DEF (def, insn)
14270 if (DF_REF_REG_DEF_P (def)
14271 && !DF_REF_IS_ARTIFICIAL (def)
14272 && (regno1 == DF_REF_REGNO (def)
14273 || regno2 == DF_REF_REGNO (def)))
14274 return true;
14276 return false;
14279 /* Function checks if instruction INSN uses register number
14280 REGNO as a part of address expression. */
14282 static bool
14283 insn_uses_reg_mem (unsigned int regno, rtx insn)
14285 df_ref use;
14287 FOR_EACH_INSN_USE (use, insn)
14288 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14289 return true;
14291 return false;
14294 /* Search backward for non-agu definition of register number REGNO1
14295 or register number REGNO2 in basic block starting from instruction
14296 START up to head of basic block or instruction INSN.
14298 Function puts true value into *FOUND var if definition was found
14299 and false otherwise.
14301 Distance in half-cycles between START and found instruction or head
14302 of BB is added to DISTANCE and returned. */
14304 static int
14305 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14306 rtx_insn *insn, int distance,
14307 rtx_insn *start, bool *found)
14309 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14310 rtx_insn *prev = start;
14311 rtx_insn *next = NULL;
14313 *found = false;
14315 while (prev
14316 && prev != insn
14317 && distance < LEA_SEARCH_THRESHOLD)
14319 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14321 distance = increase_distance (prev, next, distance);
14322 if (insn_defines_reg (regno1, regno2, prev))
14324 if (recog_memoized (prev) < 0
14325 || get_attr_type (prev) != TYPE_LEA)
14327 *found = true;
14328 return distance;
14332 next = prev;
14334 if (prev == BB_HEAD (bb))
14335 break;
14337 prev = PREV_INSN (prev);
14340 return distance;
14343 /* Search backward for non-agu definition of register number REGNO1
14344 or register number REGNO2 in INSN's basic block until
14345 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14346 2. Reach neighbor BBs boundary, or
14347 3. Reach agu definition.
14348 Returns the distance between the non-agu definition point and INSN.
14349 If no definition point, returns -1. */
14351 static int
14352 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14353 rtx_insn *insn)
14355 basic_block bb = BLOCK_FOR_INSN (insn);
14356 int distance = 0;
14357 bool found = false;
14359 if (insn != BB_HEAD (bb))
14360 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14361 distance, PREV_INSN (insn),
14362 &found);
14364 if (!found && distance < LEA_SEARCH_THRESHOLD)
14366 edge e;
14367 edge_iterator ei;
14368 bool simple_loop = false;
14370 FOR_EACH_EDGE (e, ei, bb->preds)
14371 if (e->src == bb)
14373 simple_loop = true;
14374 break;
14377 if (simple_loop)
14378 distance = distance_non_agu_define_in_bb (regno1, regno2,
14379 insn, distance,
14380 BB_END (bb), &found);
14381 else
14383 int shortest_dist = -1;
14384 bool found_in_bb = false;
14386 FOR_EACH_EDGE (e, ei, bb->preds)
14388 int bb_dist
14389 = distance_non_agu_define_in_bb (regno1, regno2,
14390 insn, distance,
14391 BB_END (e->src),
14392 &found_in_bb);
14393 if (found_in_bb)
14395 if (shortest_dist < 0)
14396 shortest_dist = bb_dist;
14397 else if (bb_dist > 0)
14398 shortest_dist = MIN (bb_dist, shortest_dist);
14400 found = true;
14404 distance = shortest_dist;
14408 /* get_attr_type may modify recog data. We want to make sure
14409 that recog data is valid for instruction INSN, on which
14410 distance_non_agu_define is called. INSN is unchanged here. */
14411 extract_insn_cached (insn);
14413 if (!found)
14414 return -1;
14416 return distance >> 1;
14419 /* Return the distance in half-cycles between INSN and the next
14420 insn that uses register number REGNO in memory address added
14421 to DISTANCE. Return -1 if REGNO0 is set.
14423 Put true value into *FOUND if register usage was found and
14424 false otherwise.
14425 Put true value into *REDEFINED if register redefinition was
14426 found and false otherwise. */
14428 static int
14429 distance_agu_use_in_bb (unsigned int regno,
14430 rtx_insn *insn, int distance, rtx_insn *start,
14431 bool *found, bool *redefined)
14433 basic_block bb = NULL;
14434 rtx_insn *next = start;
14435 rtx_insn *prev = NULL;
14437 *found = false;
14438 *redefined = false;
14440 if (start != NULL_RTX)
14442 bb = BLOCK_FOR_INSN (start);
14443 if (start != BB_HEAD (bb))
14444 /* If insn and start belong to the same bb, set prev to insn,
14445 so the call to increase_distance will increase the distance
14446 between insns by 1. */
14447 prev = insn;
14450 while (next
14451 && next != insn
14452 && distance < LEA_SEARCH_THRESHOLD)
14454 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14456 distance = increase_distance(prev, next, distance);
14457 if (insn_uses_reg_mem (regno, next))
14459 /* Return DISTANCE if OP0 is used in memory
14460 address in NEXT. */
14461 *found = true;
14462 return distance;
14465 if (insn_defines_reg (regno, INVALID_REGNUM, next))
14467 /* Return -1 if OP0 is set in NEXT. */
14468 *redefined = true;
14469 return -1;
14472 prev = next;
14475 if (next == BB_END (bb))
14476 break;
14478 next = NEXT_INSN (next);
14481 return distance;
14484 /* Return the distance between INSN and the next insn that uses
14485 register number REGNO0 in memory address. Return -1 if no such
14486 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14488 static int
14489 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14491 basic_block bb = BLOCK_FOR_INSN (insn);
14492 int distance = 0;
14493 bool found = false;
14494 bool redefined = false;
14496 if (insn != BB_END (bb))
14497 distance = distance_agu_use_in_bb (regno0, insn, distance,
14498 NEXT_INSN (insn),
14499 &found, &redefined);
14501 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14503 edge e;
14504 edge_iterator ei;
14505 bool simple_loop = false;
14507 FOR_EACH_EDGE (e, ei, bb->succs)
14508 if (e->dest == bb)
14510 simple_loop = true;
14511 break;
14514 if (simple_loop)
14515 distance = distance_agu_use_in_bb (regno0, insn,
14516 distance, BB_HEAD (bb),
14517 &found, &redefined);
14518 else
14520 int shortest_dist = -1;
14521 bool found_in_bb = false;
14522 bool redefined_in_bb = false;
14524 FOR_EACH_EDGE (e, ei, bb->succs)
14526 int bb_dist
14527 = distance_agu_use_in_bb (regno0, insn,
14528 distance, BB_HEAD (e->dest),
14529 &found_in_bb, &redefined_in_bb);
14530 if (found_in_bb)
14532 if (shortest_dist < 0)
14533 shortest_dist = bb_dist;
14534 else if (bb_dist > 0)
14535 shortest_dist = MIN (bb_dist, shortest_dist);
14537 found = true;
14541 distance = shortest_dist;
14545 if (!found || redefined)
14546 return -1;
14548 return distance >> 1;
14551 /* Define this macro to tune LEA priority vs ADD, it take effect when
14552 there is a dilemma of choosing LEA or ADD
14553 Negative value: ADD is more preferred than LEA
14554 Zero: Neutral
14555 Positive value: LEA is more preferred than ADD. */
14556 #define IX86_LEA_PRIORITY 0
14558 /* Return true if usage of lea INSN has performance advantage
14559 over a sequence of instructions. Instructions sequence has
14560 SPLIT_COST cycles higher latency than lea latency. */
14562 static bool
14563 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14564 unsigned int regno2, int split_cost, bool has_scale)
14566 int dist_define, dist_use;
14568 /* For Atom processors newer than Bonnell, if using a 2-source or
14569 3-source LEA for non-destructive destination purposes, or due to
14570 wanting ability to use SCALE, the use of LEA is justified. */
14571 if (!TARGET_BONNELL)
14573 if (has_scale)
14574 return true;
14575 if (split_cost < 1)
14576 return false;
14577 if (regno0 == regno1 || regno0 == regno2)
14578 return false;
14579 return true;
14582 rtx_insn *rinsn = recog_data.insn;
14584 dist_define = distance_non_agu_define (regno1, regno2, insn);
14585 dist_use = distance_agu_use (regno0, insn);
14587 /* distance_non_agu_define can call extract_insn_cached. If this function
14588 is called from define_split conditions, that can break insn splitting,
14589 because split_insns works by clearing recog_data.insn and then modifying
14590 recog_data.operand array and match the various split conditions. */
14591 if (recog_data.insn != rinsn)
14592 recog_data.insn = NULL;
14594 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14596 /* If there is no non AGU operand definition, no AGU
14597 operand usage and split cost is 0 then both lea
14598 and non lea variants have same priority. Currently
14599 we prefer lea for 64 bit code and non lea on 32 bit
14600 code. */
14601 if (dist_use < 0 && split_cost == 0)
14602 return TARGET_64BIT || IX86_LEA_PRIORITY;
14603 else
14604 return true;
14607 /* With longer definitions distance lea is more preferable.
14608 Here we change it to take into account splitting cost and
14609 lea priority. */
14610 dist_define += split_cost + IX86_LEA_PRIORITY;
14612 /* If there is no use in memory addess then we just check
14613 that split cost exceeds AGU stall. */
14614 if (dist_use < 0)
14615 return dist_define > LEA_MAX_STALL;
14617 /* If this insn has both backward non-agu dependence and forward
14618 agu dependence, the one with short distance takes effect. */
14619 return dist_define >= dist_use;
14622 /* Return true if it is legal to clobber flags by INSN and
14623 false otherwise. */
14625 static bool
14626 ix86_ok_to_clobber_flags (rtx_insn *insn)
14628 basic_block bb = BLOCK_FOR_INSN (insn);
14629 df_ref use;
14630 bitmap live;
14632 while (insn)
14634 if (NONDEBUG_INSN_P (insn))
14636 FOR_EACH_INSN_USE (use, insn)
14637 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14638 return false;
14640 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14641 return true;
14644 if (insn == BB_END (bb))
14645 break;
14647 insn = NEXT_INSN (insn);
14650 live = df_get_live_out(bb);
14651 return !REGNO_REG_SET_P (live, FLAGS_REG);
14654 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14655 move and add to avoid AGU stalls. */
14657 bool
14658 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
14660 unsigned int regno0, regno1, regno2;
14662 /* Check if we need to optimize. */
14663 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14664 return false;
14666 /* Check it is correct to split here. */
14667 if (!ix86_ok_to_clobber_flags(insn))
14668 return false;
14670 regno0 = true_regnum (operands[0]);
14671 regno1 = true_regnum (operands[1]);
14672 regno2 = true_regnum (operands[2]);
14674 /* We need to split only adds with non destructive
14675 destination operand. */
14676 if (regno0 == regno1 || regno0 == regno2)
14677 return false;
14678 else
14679 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
14682 /* Return true if we should emit lea instruction instead of mov
14683 instruction. */
14685 bool
14686 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
14688 unsigned int regno0, regno1;
14690 /* Check if we need to optimize. */
14691 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14692 return false;
14694 /* Use lea for reg to reg moves only. */
14695 if (!REG_P (operands[0]) || !REG_P (operands[1]))
14696 return false;
14698 regno0 = true_regnum (operands[0]);
14699 regno1 = true_regnum (operands[1]);
14701 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
14704 /* Return true if we need to split lea into a sequence of
14705 instructions to avoid AGU stalls. */
14707 bool
14708 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
14710 unsigned int regno0, regno1, regno2;
14711 int split_cost;
14712 struct ix86_address parts;
14713 int ok;
14715 /* The "at least two components" test below might not catch simple
14716 move or zero extension insns if parts.base is non-NULL and parts.disp
14717 is const0_rtx as the only components in the address, e.g. if the
14718 register is %rbp or %r13. As this test is much cheaper and moves or
14719 zero extensions are the common case, do this check first. */
14720 if (REG_P (operands[1])
14721 || (SImode_address_operand (operands[1], VOIDmode)
14722 && REG_P (XEXP (operands[1], 0))))
14723 return false;
14725 /* Check if it is OK to split here. */
14726 if (!ix86_ok_to_clobber_flags (insn))
14727 return false;
14729 ok = ix86_decompose_address (operands[1], &parts);
14730 gcc_assert (ok);
14732 /* There should be at least two components in the address. */
14733 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
14734 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
14735 return false;
14737 /* We should not split into add if non legitimate pic
14738 operand is used as displacement. */
14739 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
14740 return false;
14742 regno0 = true_regnum (operands[0]) ;
14743 regno1 = INVALID_REGNUM;
14744 regno2 = INVALID_REGNUM;
14746 if (parts.base)
14747 regno1 = true_regnum (parts.base);
14748 if (parts.index)
14749 regno2 = true_regnum (parts.index);
14751 /* Use add for a = a + b and a = b + a since it is faster and shorter
14752 than lea for most processors. For the processors like BONNELL, if
14753 the destination register of LEA holds an actual address which will
14754 be used soon, LEA is better and otherwise ADD is better. */
14755 if (!TARGET_BONNELL
14756 && parts.scale == 1
14757 && (!parts.disp || parts.disp == const0_rtx)
14758 && (regno0 == regno1 || regno0 == regno2))
14759 return true;
14761 /* Check we need to optimize. */
14762 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
14763 return false;
14765 split_cost = 0;
14767 /* Compute how many cycles we will add to execution time
14768 if split lea into a sequence of instructions. */
14769 if (parts.base || parts.index)
14771 /* Have to use mov instruction if non desctructive
14772 destination form is used. */
14773 if (regno1 != regno0 && regno2 != regno0)
14774 split_cost += 1;
14776 /* Have to add index to base if both exist. */
14777 if (parts.base && parts.index)
14778 split_cost += 1;
14780 /* Have to use shift and adds if scale is 2 or greater. */
14781 if (parts.scale > 1)
14783 if (regno0 != regno1)
14784 split_cost += 1;
14785 else if (regno2 == regno0)
14786 split_cost += 4;
14787 else
14788 split_cost += parts.scale;
14791 /* Have to use add instruction with immediate if
14792 disp is non zero. */
14793 if (parts.disp && parts.disp != const0_rtx)
14794 split_cost += 1;
14796 /* Subtract the price of lea. */
14797 split_cost -= 1;
14800 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
14801 parts.scale > 1);
14804 /* Return true if it is ok to optimize an ADD operation to LEA
14805 operation to avoid flag register consumation. For most processors,
14806 ADD is faster than LEA. For the processors like BONNELL, if the
14807 destination register of LEA holds an actual address which will be
14808 used soon, LEA is better and otherwise ADD is better. */
14810 bool
14811 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
14813 unsigned int regno0 = true_regnum (operands[0]);
14814 unsigned int regno1 = true_regnum (operands[1]);
14815 unsigned int regno2 = true_regnum (operands[2]);
14817 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14818 if (regno0 != regno1 && regno0 != regno2)
14819 return true;
14821 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14822 return false;
14824 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
14827 /* Return true if destination reg of SET_BODY is shift count of
14828 USE_BODY. */
14830 static bool
14831 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14833 rtx set_dest;
14834 rtx shift_rtx;
14835 int i;
14837 /* Retrieve destination of SET_BODY. */
14838 switch (GET_CODE (set_body))
14840 case SET:
14841 set_dest = SET_DEST (set_body);
14842 if (!set_dest || !REG_P (set_dest))
14843 return false;
14844 break;
14845 case PARALLEL:
14846 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14847 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14848 use_body))
14849 return true;
14850 /* FALLTHROUGH */
14851 default:
14852 return false;
14855 /* Retrieve shift count of USE_BODY. */
14856 switch (GET_CODE (use_body))
14858 case SET:
14859 shift_rtx = XEXP (use_body, 1);
14860 break;
14861 case PARALLEL:
14862 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14863 if (ix86_dep_by_shift_count_body (set_body,
14864 XVECEXP (use_body, 0, i)))
14865 return true;
14866 /* FALLTHROUGH */
14867 default:
14868 return false;
14871 if (shift_rtx
14872 && (GET_CODE (shift_rtx) == ASHIFT
14873 || GET_CODE (shift_rtx) == LSHIFTRT
14874 || GET_CODE (shift_rtx) == ASHIFTRT
14875 || GET_CODE (shift_rtx) == ROTATE
14876 || GET_CODE (shift_rtx) == ROTATERT))
14878 rtx shift_count = XEXP (shift_rtx, 1);
14880 /* Return true if shift count is dest of SET_BODY. */
14881 if (REG_P (shift_count))
14883 /* Add check since it can be invoked before register
14884 allocation in pre-reload schedule. */
14885 if (reload_completed
14886 && true_regnum (set_dest) == true_regnum (shift_count))
14887 return true;
14888 else if (REGNO(set_dest) == REGNO(shift_count))
14889 return true;
14893 return false;
14896 /* Return true if destination reg of SET_INSN is shift count of
14897 USE_INSN. */
14899 bool
14900 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14902 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14903 PATTERN (use_insn));
14906 /* Return TRUE or FALSE depending on whether the unary operator meets the
14907 appropriate constraints. */
14909 bool
14910 ix86_unary_operator_ok (enum rtx_code,
14911 machine_mode,
14912 rtx operands[2])
14914 /* If one of operands is memory, source and destination must match. */
14915 if ((MEM_P (operands[0])
14916 || MEM_P (operands[1]))
14917 && ! rtx_equal_p (operands[0], operands[1]))
14918 return false;
14919 return true;
14922 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14923 are ok, keeping in mind the possible movddup alternative. */
14925 bool
14926 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14928 if (MEM_P (operands[0]))
14929 return rtx_equal_p (operands[0], operands[1 + high]);
14930 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14931 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14932 return true;
14935 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14936 then replicate the value for all elements of the vector
14937 register. */
14940 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
14942 int i, n_elt;
14943 rtvec v;
14944 machine_mode scalar_mode;
14946 switch (mode)
14948 case E_V64QImode:
14949 case E_V32QImode:
14950 case E_V16QImode:
14951 case E_V32HImode:
14952 case E_V16HImode:
14953 case E_V8HImode:
14954 case E_V16SImode:
14955 case E_V8SImode:
14956 case E_V4SImode:
14957 case E_V8DImode:
14958 case E_V4DImode:
14959 case E_V2DImode:
14960 gcc_assert (vect);
14961 /* FALLTHRU */
14962 case E_V16SFmode:
14963 case E_V8SFmode:
14964 case E_V4SFmode:
14965 case E_V2SFmode:
14966 case E_V8DFmode:
14967 case E_V4DFmode:
14968 case E_V2DFmode:
14969 n_elt = GET_MODE_NUNITS (mode);
14970 v = rtvec_alloc (n_elt);
14971 scalar_mode = GET_MODE_INNER (mode);
14973 RTVEC_ELT (v, 0) = value;
14975 for (i = 1; i < n_elt; ++i)
14976 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
14978 return gen_rtx_CONST_VECTOR (mode, v);
14980 default:
14981 gcc_unreachable ();
14985 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14986 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14987 for an SSE register. If VECT is true, then replicate the mask for
14988 all elements of the vector register. If INVERT is true, then create
14989 a mask excluding the sign bit. */
14992 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
14994 machine_mode vec_mode, imode;
14995 wide_int w;
14996 rtx mask, v;
14998 switch (mode)
15000 case E_V16SImode:
15001 case E_V16SFmode:
15002 case E_V8SImode:
15003 case E_V4SImode:
15004 case E_V8SFmode:
15005 case E_V4SFmode:
15006 case E_V2SFmode:
15007 vec_mode = mode;
15008 imode = SImode;
15009 break;
15011 case E_V8DImode:
15012 case E_V4DImode:
15013 case E_V2DImode:
15014 case E_V8DFmode:
15015 case E_V4DFmode:
15016 case E_V2DFmode:
15017 vec_mode = mode;
15018 imode = DImode;
15019 break;
15021 case E_TImode:
15022 case E_TFmode:
15023 vec_mode = VOIDmode;
15024 imode = TImode;
15025 break;
15027 default:
15028 gcc_unreachable ();
15031 machine_mode inner_mode = GET_MODE_INNER (mode);
15032 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15033 GET_MODE_BITSIZE (inner_mode));
15034 if (invert)
15035 w = wi::bit_not (w);
15037 /* Force this value into the low part of a fp vector constant. */
15038 mask = immed_wide_int_const (w, imode);
15039 mask = gen_lowpart (inner_mode, mask);
15041 if (vec_mode == VOIDmode)
15042 return force_reg (inner_mode, mask);
15044 v = ix86_build_const_vector (vec_mode, vect, mask);
15045 return force_reg (vec_mode, v);
15048 /* Return TRUE or FALSE depending on whether the first SET in INSN
15049 has source and destination with matching CC modes, and that the
15050 CC mode is at least as constrained as REQ_MODE. */
15052 bool
15053 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15055 rtx set;
15056 machine_mode set_mode;
15058 set = PATTERN (insn);
15059 if (GET_CODE (set) == PARALLEL)
15060 set = XVECEXP (set, 0, 0);
15061 gcc_assert (GET_CODE (set) == SET);
15062 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15064 set_mode = GET_MODE (SET_DEST (set));
15065 switch (set_mode)
15067 case E_CCNOmode:
15068 if (req_mode != CCNOmode
15069 && (req_mode != CCmode
15070 || XEXP (SET_SRC (set), 1) != const0_rtx))
15071 return false;
15072 break;
15073 case E_CCmode:
15074 if (req_mode == CCGCmode)
15075 return false;
15076 /* FALLTHRU */
15077 case E_CCGCmode:
15078 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15079 return false;
15080 /* FALLTHRU */
15081 case E_CCGOCmode:
15082 if (req_mode == CCZmode)
15083 return false;
15084 /* FALLTHRU */
15085 case E_CCZmode:
15086 break;
15088 case E_CCGZmode:
15090 case E_CCAmode:
15091 case E_CCCmode:
15092 case E_CCOmode:
15093 case E_CCPmode:
15094 case E_CCSmode:
15095 if (set_mode != req_mode)
15096 return false;
15097 break;
15099 default:
15100 gcc_unreachable ();
15103 return GET_MODE (SET_SRC (set)) == set_mode;
15106 machine_mode
15107 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15109 machine_mode mode = GET_MODE (op0);
15111 if (SCALAR_FLOAT_MODE_P (mode))
15113 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15114 return CCFPmode;
15117 switch (code)
15119 /* Only zero flag is needed. */
15120 case EQ: /* ZF=0 */
15121 case NE: /* ZF!=0 */
15122 return CCZmode;
15123 /* Codes needing carry flag. */
15124 case GEU: /* CF=0 */
15125 case LTU: /* CF=1 */
15126 /* Detect overflow checks. They need just the carry flag. */
15127 if (GET_CODE (op0) == PLUS
15128 && (rtx_equal_p (op1, XEXP (op0, 0))
15129 || rtx_equal_p (op1, XEXP (op0, 1))))
15130 return CCCmode;
15131 else
15132 return CCmode;
15133 case GTU: /* CF=0 & ZF=0 */
15134 case LEU: /* CF=1 | ZF=1 */
15135 return CCmode;
15136 /* Codes possibly doable only with sign flag when
15137 comparing against zero. */
15138 case GE: /* SF=OF or SF=0 */
15139 case LT: /* SF<>OF or SF=1 */
15140 if (op1 == const0_rtx)
15141 return CCGOCmode;
15142 else
15143 /* For other cases Carry flag is not required. */
15144 return CCGCmode;
15145 /* Codes doable only with sign flag when comparing
15146 against zero, but we miss jump instruction for it
15147 so we need to use relational tests against overflow
15148 that thus needs to be zero. */
15149 case GT: /* ZF=0 & SF=OF */
15150 case LE: /* ZF=1 | SF<>OF */
15151 if (op1 == const0_rtx)
15152 return CCNOmode;
15153 else
15154 return CCGCmode;
15155 /* strcmp pattern do (use flags) and combine may ask us for proper
15156 mode. */
15157 case USE:
15158 return CCmode;
15159 default:
15160 gcc_unreachable ();
15164 /* Return the fixed registers used for condition codes. */
15166 static bool
15167 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15169 *p1 = FLAGS_REG;
15170 *p2 = INVALID_REGNUM;
15171 return true;
15174 /* If two condition code modes are compatible, return a condition code
15175 mode which is compatible with both. Otherwise, return
15176 VOIDmode. */
15178 static machine_mode
15179 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15181 if (m1 == m2)
15182 return m1;
15184 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15185 return VOIDmode;
15187 if ((m1 == CCGCmode && m2 == CCGOCmode)
15188 || (m1 == CCGOCmode && m2 == CCGCmode))
15189 return CCGCmode;
15191 if ((m1 == CCNOmode && m2 == CCGOCmode)
15192 || (m1 == CCGOCmode && m2 == CCNOmode))
15193 return CCNOmode;
15195 if (m1 == CCZmode
15196 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15197 return m2;
15198 else if (m2 == CCZmode
15199 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15200 return m1;
15202 switch (m1)
15204 default:
15205 gcc_unreachable ();
15207 case E_CCmode:
15208 case E_CCGCmode:
15209 case E_CCGOCmode:
15210 case E_CCNOmode:
15211 case E_CCAmode:
15212 case E_CCCmode:
15213 case E_CCOmode:
15214 case E_CCPmode:
15215 case E_CCSmode:
15216 case E_CCZmode:
15217 switch (m2)
15219 default:
15220 return VOIDmode;
15222 case E_CCmode:
15223 case E_CCGCmode:
15224 case E_CCGOCmode:
15225 case E_CCNOmode:
15226 case E_CCAmode:
15227 case E_CCCmode:
15228 case E_CCOmode:
15229 case E_CCPmode:
15230 case E_CCSmode:
15231 case E_CCZmode:
15232 return CCmode;
15235 case E_CCFPmode:
15236 /* These are only compatible with themselves, which we already
15237 checked above. */
15238 return VOIDmode;
15242 /* Return strategy to use for floating-point. We assume that fcomi is always
15243 preferrable where available, since that is also true when looking at size
15244 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15246 enum ix86_fpcmp_strategy
15247 ix86_fp_comparison_strategy (enum rtx_code)
15249 /* Do fcomi/sahf based test when profitable. */
15251 if (TARGET_CMOVE)
15252 return IX86_FPCMP_COMI;
15254 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15255 return IX86_FPCMP_SAHF;
15257 return IX86_FPCMP_ARITH;
15260 /* Convert comparison codes we use to represent FP comparison to integer
15261 code that will result in proper branch. Return UNKNOWN if no such code
15262 is available. */
15264 enum rtx_code
15265 ix86_fp_compare_code_to_integer (enum rtx_code code)
15267 switch (code)
15269 case GT:
15270 return GTU;
15271 case GE:
15272 return GEU;
15273 case ORDERED:
15274 case UNORDERED:
15275 return code;
15276 case UNEQ:
15277 return EQ;
15278 case UNLT:
15279 return LTU;
15280 case UNLE:
15281 return LEU;
15282 case LTGT:
15283 return NE;
15284 default:
15285 return UNKNOWN;
15289 /* Zero extend possibly SImode EXP to Pmode register. */
15291 ix86_zero_extend_to_Pmode (rtx exp)
15293 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15296 /* Return true if the function being called was marked with attribute
15297 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15298 to handle the non-PIC case in the backend because there is no easy
15299 interface for the front-end to force non-PLT calls to use the GOT.
15300 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15301 to call the function marked "noplt" indirectly. */
15303 static bool
15304 ix86_nopic_noplt_attribute_p (rtx call_op)
15306 if (flag_pic || ix86_cmodel == CM_LARGE
15307 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15308 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15309 || SYMBOL_REF_LOCAL_P (call_op))
15310 return false;
15312 tree symbol_decl = SYMBOL_REF_DECL (call_op);
15314 if (!flag_plt
15315 || (symbol_decl != NULL_TREE
15316 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15317 return true;
15319 return false;
15322 /* Helper to output the jmp/call. */
15323 static void
15324 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15326 if (thunk_name != NULL)
15328 fprintf (asm_out_file, "\tjmp\t");
15329 assemble_name (asm_out_file, thunk_name);
15330 putc ('\n', asm_out_file);
15332 else
15333 output_indirect_thunk (regno);
15336 /* Output indirect branch via a call and return thunk. CALL_OP is a
15337 register which contains the branch target. XASM is the assembly
15338 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15339 A normal call is converted to:
15341 call __x86_indirect_thunk_reg
15343 and a tail call is converted to:
15345 jmp __x86_indirect_thunk_reg
15348 static void
15349 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15351 char thunk_name_buf[32];
15352 char *thunk_name;
15353 enum indirect_thunk_prefix need_prefix
15354 = indirect_thunk_need_prefix (current_output_insn);
15355 int regno = REGNO (call_op);
15357 if (cfun->machine->indirect_branch_type
15358 != indirect_branch_thunk_inline)
15360 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15362 int i = regno;
15363 if (i >= FIRST_REX_INT_REG)
15364 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15365 indirect_thunks_used |= 1 << i;
15367 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15368 thunk_name = thunk_name_buf;
15370 else
15371 thunk_name = NULL;
15373 if (sibcall_p)
15374 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15375 else
15377 if (thunk_name != NULL)
15379 fprintf (asm_out_file, "\tcall\t");
15380 assemble_name (asm_out_file, thunk_name);
15381 putc ('\n', asm_out_file);
15382 return;
15385 char indirectlabel1[32];
15386 char indirectlabel2[32];
15388 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15389 INDIRECT_LABEL,
15390 indirectlabelno++);
15391 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15392 INDIRECT_LABEL,
15393 indirectlabelno++);
15395 /* Jump. */
15396 fputs ("\tjmp\t", asm_out_file);
15397 assemble_name_raw (asm_out_file, indirectlabel2);
15398 fputc ('\n', asm_out_file);
15400 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15402 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15404 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15406 /* Call. */
15407 fputs ("\tcall\t", asm_out_file);
15408 assemble_name_raw (asm_out_file, indirectlabel1);
15409 fputc ('\n', asm_out_file);
15413 /* Output indirect branch via a call and return thunk. CALL_OP is
15414 the branch target. XASM is the assembly template for CALL_OP.
15415 Branch is a tail call if SIBCALL_P is true. A normal call is
15416 converted to:
15418 jmp L2
15420 push CALL_OP
15421 jmp __x86_indirect_thunk
15423 call L1
15425 and a tail call is converted to:
15427 push CALL_OP
15428 jmp __x86_indirect_thunk
15431 static void
15432 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15433 bool sibcall_p)
15435 char thunk_name_buf[32];
15436 char *thunk_name;
15437 char push_buf[64];
15438 enum indirect_thunk_prefix need_prefix
15439 = indirect_thunk_need_prefix (current_output_insn);
15440 int regno = -1;
15442 if (cfun->machine->indirect_branch_type
15443 != indirect_branch_thunk_inline)
15445 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15446 indirect_thunk_needed = true;
15447 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15448 thunk_name = thunk_name_buf;
15450 else
15451 thunk_name = NULL;
15453 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15454 TARGET_64BIT ? 'q' : 'l', xasm);
15456 if (sibcall_p)
15458 output_asm_insn (push_buf, &call_op);
15459 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15461 else
15463 char indirectlabel1[32];
15464 char indirectlabel2[32];
15466 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15467 INDIRECT_LABEL,
15468 indirectlabelno++);
15469 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15470 INDIRECT_LABEL,
15471 indirectlabelno++);
15473 /* Jump. */
15474 fputs ("\tjmp\t", asm_out_file);
15475 assemble_name_raw (asm_out_file, indirectlabel2);
15476 fputc ('\n', asm_out_file);
15478 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15480 /* An external function may be called via GOT, instead of PLT. */
15481 if (MEM_P (call_op))
15483 struct ix86_address parts;
15484 rtx addr = XEXP (call_op, 0);
15485 if (ix86_decompose_address (addr, &parts)
15486 && parts.base == stack_pointer_rtx)
15488 /* Since call will adjust stack by -UNITS_PER_WORD,
15489 we must convert "disp(stack, index, scale)" to
15490 "disp+UNITS_PER_WORD(stack, index, scale)". */
15491 if (parts.index)
15493 addr = gen_rtx_MULT (Pmode, parts.index,
15494 GEN_INT (parts.scale));
15495 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15496 addr);
15498 else
15499 addr = stack_pointer_rtx;
15501 rtx disp;
15502 if (parts.disp != NULL_RTX)
15503 disp = plus_constant (Pmode, parts.disp,
15504 UNITS_PER_WORD);
15505 else
15506 disp = GEN_INT (UNITS_PER_WORD);
15508 addr = gen_rtx_PLUS (Pmode, addr, disp);
15509 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15513 output_asm_insn (push_buf, &call_op);
15515 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15517 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15519 /* Call. */
15520 fputs ("\tcall\t", asm_out_file);
15521 assemble_name_raw (asm_out_file, indirectlabel1);
15522 fputc ('\n', asm_out_file);
15526 /* Output indirect branch via a call and return thunk. CALL_OP is
15527 the branch target. XASM is the assembly template for CALL_OP.
15528 Branch is a tail call if SIBCALL_P is true. */
15530 static void
15531 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15532 bool sibcall_p)
15534 if (REG_P (call_op))
15535 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15536 else
15537 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15540 /* Output indirect jump. CALL_OP is the jump target. */
15542 const char *
15543 ix86_output_indirect_jmp (rtx call_op)
15545 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15547 /* We can't have red-zone since "call" in the indirect thunk
15548 pushes the return address onto stack, destroying red-zone. */
15549 if (ix86_red_zone_size != 0)
15550 gcc_unreachable ();
15552 ix86_output_indirect_branch (call_op, "%0", true);
15553 return "";
15555 else
15556 return "%!jmp\t%A0";
15559 /* Output return instrumentation for current function if needed. */
15561 static void
15562 output_return_instrumentation (void)
15564 if (ix86_instrument_return != instrument_return_none
15565 && flag_fentry
15566 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15568 if (ix86_flag_record_return)
15569 fprintf (asm_out_file, "1:\n");
15570 switch (ix86_instrument_return)
15572 case instrument_return_call:
15573 fprintf (asm_out_file, "\tcall\t__return__\n");
15574 break;
15575 case instrument_return_nop5:
15576 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15577 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15578 break;
15579 case instrument_return_none:
15580 break;
15583 if (ix86_flag_record_return)
15585 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15586 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15587 fprintf (asm_out_file, "\t.previous\n");
15592 /* Output function return. CALL_OP is the jump target. Add a REP
15593 prefix to RET if LONG_P is true and function return is kept. */
15595 const char *
15596 ix86_output_function_return (bool long_p)
15598 output_return_instrumentation ();
15600 if (cfun->machine->function_return_type != indirect_branch_keep)
15602 char thunk_name[32];
15603 enum indirect_thunk_prefix need_prefix
15604 = indirect_thunk_need_prefix (current_output_insn);
15606 if (cfun->machine->function_return_type
15607 != indirect_branch_thunk_inline)
15609 bool need_thunk = (cfun->machine->function_return_type
15610 == indirect_branch_thunk);
15611 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15612 true);
15613 indirect_return_needed |= need_thunk;
15614 fprintf (asm_out_file, "\tjmp\t");
15615 assemble_name (asm_out_file, thunk_name);
15616 putc ('\n', asm_out_file);
15618 else
15619 output_indirect_thunk (INVALID_REGNUM);
15621 return "";
15624 if (!long_p)
15625 return "%!ret";
15627 return "rep%; ret";
15630 /* Output indirect function return. RET_OP is the function return
15631 target. */
15633 const char *
15634 ix86_output_indirect_function_return (rtx ret_op)
15636 if (cfun->machine->function_return_type != indirect_branch_keep)
15638 char thunk_name[32];
15639 enum indirect_thunk_prefix need_prefix
15640 = indirect_thunk_need_prefix (current_output_insn);
15641 unsigned int regno = REGNO (ret_op);
15642 gcc_assert (regno == CX_REG);
15644 if (cfun->machine->function_return_type
15645 != indirect_branch_thunk_inline)
15647 bool need_thunk = (cfun->machine->function_return_type
15648 == indirect_branch_thunk);
15649 indirect_thunk_name (thunk_name, regno, need_prefix, true);
15651 if (need_thunk)
15653 indirect_return_via_cx = true;
15654 indirect_thunks_used |= 1 << CX_REG;
15656 fprintf (asm_out_file, "\tjmp\t");
15657 assemble_name (asm_out_file, thunk_name);
15658 putc ('\n', asm_out_file);
15660 else
15661 output_indirect_thunk (regno);
15663 return "";
15665 else
15666 return "%!jmp\t%A0";
15669 /* Output the assembly for a call instruction. */
15671 const char *
15672 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
15674 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
15675 bool output_indirect_p
15676 = (!TARGET_SEH
15677 && cfun->machine->indirect_branch_type != indirect_branch_keep);
15678 bool seh_nop_p = false;
15679 const char *xasm;
15681 if (SIBLING_CALL_P (insn))
15683 output_return_instrumentation ();
15684 if (direct_p)
15686 if (ix86_nopic_noplt_attribute_p (call_op))
15688 direct_p = false;
15689 if (TARGET_64BIT)
15691 if (output_indirect_p)
15692 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15693 else
15694 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15696 else
15698 if (output_indirect_p)
15699 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15700 else
15701 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15704 else
15705 xasm = "%!jmp\t%P0";
15707 /* SEH epilogue detection requires the indirect branch case
15708 to include REX.W. */
15709 else if (TARGET_SEH)
15710 xasm = "%!rex.W jmp\t%A0";
15711 else
15713 if (output_indirect_p)
15714 xasm = "%0";
15715 else
15716 xasm = "%!jmp\t%A0";
15719 if (output_indirect_p && !direct_p)
15720 ix86_output_indirect_branch (call_op, xasm, true);
15721 else
15722 output_asm_insn (xasm, &call_op);
15723 return "";
15726 /* SEH unwinding can require an extra nop to be emitted in several
15727 circumstances. Determine if we have one of those. */
15728 if (TARGET_SEH)
15730 rtx_insn *i;
15732 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
15734 /* Prevent a catch region from being adjacent to a jump that would
15735 be interpreted as an epilogue sequence by the unwinder. */
15736 if (JUMP_P(i) && CROSSING_JUMP_P (i))
15738 seh_nop_p = true;
15739 break;
15742 /* If we get to another real insn, we don't need the nop. */
15743 if (INSN_P (i))
15744 break;
15746 /* If we get to the epilogue note, prevent a catch region from
15747 being adjacent to the standard epilogue sequence. If non-
15748 call-exceptions, we'll have done this during epilogue emission. */
15749 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
15750 && !flag_non_call_exceptions
15751 && !can_throw_internal (insn))
15753 seh_nop_p = true;
15754 break;
15758 /* If we didn't find a real insn following the call, prevent the
15759 unwinder from looking into the next function. */
15760 if (i == NULL)
15761 seh_nop_p = true;
15764 if (direct_p)
15766 if (ix86_nopic_noplt_attribute_p (call_op))
15768 direct_p = false;
15769 if (TARGET_64BIT)
15771 if (output_indirect_p)
15772 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15773 else
15774 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15776 else
15778 if (output_indirect_p)
15779 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15780 else
15781 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15784 else
15785 xasm = "%!call\t%P0";
15787 else
15789 if (output_indirect_p)
15790 xasm = "%0";
15791 else
15792 xasm = "%!call\t%A0";
15795 if (output_indirect_p && !direct_p)
15796 ix86_output_indirect_branch (call_op, xasm, false);
15797 else
15798 output_asm_insn (xasm, &call_op);
15800 if (seh_nop_p)
15801 return "nop";
15803 return "";
15806 /* Return a MEM corresponding to a stack slot with mode MODE.
15807 Allocate a new slot if necessary.
15809 The RTL for a function can have several slots available: N is
15810 which slot to use. */
15813 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
15815 struct stack_local_entry *s;
15817 gcc_assert (n < MAX_386_STACK_LOCALS);
15819 for (s = ix86_stack_locals; s; s = s->next)
15820 if (s->mode == mode && s->n == n)
15821 return validize_mem (copy_rtx (s->rtl));
15823 s = ggc_alloc<stack_local_entry> ();
15824 s->n = n;
15825 s->mode = mode;
15826 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15828 s->next = ix86_stack_locals;
15829 ix86_stack_locals = s;
15830 return validize_mem (copy_rtx (s->rtl));
15833 static void
15834 ix86_instantiate_decls (void)
15836 struct stack_local_entry *s;
15838 for (s = ix86_stack_locals; s; s = s->next)
15839 if (s->rtl != NULL_RTX)
15840 instantiate_decl_rtl (s->rtl);
15843 /* Check whether x86 address PARTS is a pc-relative address. */
15845 bool
15846 ix86_rip_relative_addr_p (struct ix86_address *parts)
15848 rtx base, index, disp;
15850 base = parts->base;
15851 index = parts->index;
15852 disp = parts->disp;
15854 if (disp && !base && !index)
15856 if (TARGET_64BIT)
15858 rtx symbol = disp;
15860 if (GET_CODE (disp) == CONST)
15861 symbol = XEXP (disp, 0);
15862 if (GET_CODE (symbol) == PLUS
15863 && CONST_INT_P (XEXP (symbol, 1)))
15864 symbol = XEXP (symbol, 0);
15866 if (GET_CODE (symbol) == LABEL_REF
15867 || (GET_CODE (symbol) == SYMBOL_REF
15868 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
15869 || (GET_CODE (symbol) == UNSPEC
15870 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
15871 || XINT (symbol, 1) == UNSPEC_PCREL
15872 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
15873 return true;
15876 return false;
15879 /* Calculate the length of the memory address in the instruction encoding.
15880 Includes addr32 prefix, does not include the one-byte modrm, opcode,
15881 or other prefixes. We never generate addr32 prefix for LEA insn. */
15884 memory_address_length (rtx addr, bool lea)
15886 struct ix86_address parts;
15887 rtx base, index, disp;
15888 int len;
15889 int ok;
15891 if (GET_CODE (addr) == PRE_DEC
15892 || GET_CODE (addr) == POST_INC
15893 || GET_CODE (addr) == PRE_MODIFY
15894 || GET_CODE (addr) == POST_MODIFY)
15895 return 0;
15897 ok = ix86_decompose_address (addr, &parts);
15898 gcc_assert (ok);
15900 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
15902 /* If this is not LEA instruction, add the length of addr32 prefix. */
15903 if (TARGET_64BIT && !lea
15904 && (SImode_address_operand (addr, VOIDmode)
15905 || (parts.base && GET_MODE (parts.base) == SImode)
15906 || (parts.index && GET_MODE (parts.index) == SImode)))
15907 len++;
15909 base = parts.base;
15910 index = parts.index;
15911 disp = parts.disp;
15913 if (base && SUBREG_P (base))
15914 base = SUBREG_REG (base);
15915 if (index && SUBREG_P (index))
15916 index = SUBREG_REG (index);
15918 gcc_assert (base == NULL_RTX || REG_P (base));
15919 gcc_assert (index == NULL_RTX || REG_P (index));
15921 /* Rule of thumb:
15922 - esp as the base always wants an index,
15923 - ebp as the base always wants a displacement,
15924 - r12 as the base always wants an index,
15925 - r13 as the base always wants a displacement. */
15927 /* Register Indirect. */
15928 if (base && !index && !disp)
15930 /* esp (for its index) and ebp (for its displacement) need
15931 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
15932 code. */
15933 if (base == arg_pointer_rtx
15934 || base == frame_pointer_rtx
15935 || REGNO (base) == SP_REG
15936 || REGNO (base) == BP_REG
15937 || REGNO (base) == R12_REG
15938 || REGNO (base) == R13_REG)
15939 len++;
15942 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
15943 is not disp32, but disp32(%rip), so for disp32
15944 SIB byte is needed, unless print_operand_address
15945 optimizes it into disp32(%rip) or (%rip) is implied
15946 by UNSPEC. */
15947 else if (disp && !base && !index)
15949 len += 4;
15950 if (!ix86_rip_relative_addr_p (&parts))
15951 len++;
15953 else
15955 /* Find the length of the displacement constant. */
15956 if (disp)
15958 if (base && satisfies_constraint_K (disp))
15959 len += 1;
15960 else
15961 len += 4;
15963 /* ebp always wants a displacement. Similarly r13. */
15964 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
15965 len++;
15967 /* An index requires the two-byte modrm form.... */
15968 if (index
15969 /* ...like esp (or r12), which always wants an index. */
15970 || base == arg_pointer_rtx
15971 || base == frame_pointer_rtx
15972 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
15973 len++;
15976 return len;
15979 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15980 is set, expect that insn have 8bit immediate alternative. */
15982 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
15984 int len = 0;
15985 int i;
15986 extract_insn_cached (insn);
15987 for (i = recog_data.n_operands - 1; i >= 0; --i)
15988 if (CONSTANT_P (recog_data.operand[i]))
15990 enum attr_mode mode = get_attr_mode (insn);
15992 gcc_assert (!len);
15993 if (shortform && CONST_INT_P (recog_data.operand[i]))
15995 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
15996 switch (mode)
15998 case MODE_QI:
15999 len = 1;
16000 continue;
16001 case MODE_HI:
16002 ival = trunc_int_for_mode (ival, HImode);
16003 break;
16004 case MODE_SI:
16005 ival = trunc_int_for_mode (ival, SImode);
16006 break;
16007 default:
16008 break;
16010 if (IN_RANGE (ival, -128, 127))
16012 len = 1;
16013 continue;
16016 switch (mode)
16018 case MODE_QI:
16019 len = 1;
16020 break;
16021 case MODE_HI:
16022 len = 2;
16023 break;
16024 case MODE_SI:
16025 len = 4;
16026 break;
16027 /* Immediates for DImode instructions are encoded
16028 as 32bit sign extended values. */
16029 case MODE_DI:
16030 len = 4;
16031 break;
16032 default:
16033 fatal_insn ("unknown insn mode", insn);
16036 return len;
16039 /* Compute default value for "length_address" attribute. */
16041 ix86_attr_length_address_default (rtx_insn *insn)
16043 int i;
16045 if (get_attr_type (insn) == TYPE_LEA)
16047 rtx set = PATTERN (insn), addr;
16049 if (GET_CODE (set) == PARALLEL)
16050 set = XVECEXP (set, 0, 0);
16052 gcc_assert (GET_CODE (set) == SET);
16054 addr = SET_SRC (set);
16056 return memory_address_length (addr, true);
16059 extract_insn_cached (insn);
16060 for (i = recog_data.n_operands - 1; i >= 0; --i)
16062 rtx op = recog_data.operand[i];
16063 if (MEM_P (op))
16065 constrain_operands_cached (insn, reload_completed);
16066 if (which_alternative != -1)
16068 const char *constraints = recog_data.constraints[i];
16069 int alt = which_alternative;
16071 while (*constraints == '=' || *constraints == '+')
16072 constraints++;
16073 while (alt-- > 0)
16074 while (*constraints++ != ',')
16076 /* Skip ignored operands. */
16077 if (*constraints == 'X')
16078 continue;
16081 int len = memory_address_length (XEXP (op, 0), false);
16083 /* Account for segment prefix for non-default addr spaces. */
16084 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16085 len++;
16087 return len;
16090 return 0;
16093 /* Compute default value for "length_vex" attribute. It includes
16094 2 or 3 byte VEX prefix and 1 opcode byte. */
16097 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16098 bool has_vex_w)
16100 int i;
16102 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16103 byte VEX prefix. */
16104 if (!has_0f_opcode || has_vex_w)
16105 return 3 + 1;
16107 /* We can always use 2 byte VEX prefix in 32bit. */
16108 if (!TARGET_64BIT)
16109 return 2 + 1;
16111 extract_insn_cached (insn);
16113 for (i = recog_data.n_operands - 1; i >= 0; --i)
16114 if (REG_P (recog_data.operand[i]))
16116 /* REX.W bit uses 3 byte VEX prefix. */
16117 if (GET_MODE (recog_data.operand[i]) == DImode
16118 && GENERAL_REG_P (recog_data.operand[i]))
16119 return 3 + 1;
16121 else
16123 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16124 if (MEM_P (recog_data.operand[i])
16125 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16126 return 3 + 1;
16129 return 2 + 1;
16133 static bool
16134 ix86_class_likely_spilled_p (reg_class_t);
16136 /* Returns true if lhs of insn is HW function argument register and set up
16137 is_spilled to true if it is likely spilled HW register. */
16138 static bool
16139 insn_is_function_arg (rtx insn, bool* is_spilled)
16141 rtx dst;
16143 if (!NONDEBUG_INSN_P (insn))
16144 return false;
16145 /* Call instructions are not movable, ignore it. */
16146 if (CALL_P (insn))
16147 return false;
16148 insn = PATTERN (insn);
16149 if (GET_CODE (insn) == PARALLEL)
16150 insn = XVECEXP (insn, 0, 0);
16151 if (GET_CODE (insn) != SET)
16152 return false;
16153 dst = SET_DEST (insn);
16154 if (REG_P (dst) && HARD_REGISTER_P (dst)
16155 && ix86_function_arg_regno_p (REGNO (dst)))
16157 /* Is it likely spilled HW register? */
16158 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16159 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16160 *is_spilled = true;
16161 return true;
16163 return false;
16166 /* Add output dependencies for chain of function adjacent arguments if only
16167 there is a move to likely spilled HW register. Return first argument
16168 if at least one dependence was added or NULL otherwise. */
16169 static rtx_insn *
16170 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16172 rtx_insn *insn;
16173 rtx_insn *last = call;
16174 rtx_insn *first_arg = NULL;
16175 bool is_spilled = false;
16177 head = PREV_INSN (head);
16179 /* Find nearest to call argument passing instruction. */
16180 while (true)
16182 last = PREV_INSN (last);
16183 if (last == head)
16184 return NULL;
16185 if (!NONDEBUG_INSN_P (last))
16186 continue;
16187 if (insn_is_function_arg (last, &is_spilled))
16188 break;
16189 return NULL;
16192 first_arg = last;
16193 while (true)
16195 insn = PREV_INSN (last);
16196 if (!INSN_P (insn))
16197 break;
16198 if (insn == head)
16199 break;
16200 if (!NONDEBUG_INSN_P (insn))
16202 last = insn;
16203 continue;
16205 if (insn_is_function_arg (insn, &is_spilled))
16207 /* Add output depdendence between two function arguments if chain
16208 of output arguments contains likely spilled HW registers. */
16209 if (is_spilled)
16210 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16211 first_arg = last = insn;
16213 else
16214 break;
16216 if (!is_spilled)
16217 return NULL;
16218 return first_arg;
16221 /* Add output or anti dependency from insn to first_arg to restrict its code
16222 motion. */
16223 static void
16224 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16226 rtx set;
16227 rtx tmp;
16229 set = single_set (insn);
16230 if (!set)
16231 return;
16232 tmp = SET_DEST (set);
16233 if (REG_P (tmp))
16235 /* Add output dependency to the first function argument. */
16236 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16237 return;
16239 /* Add anti dependency. */
16240 add_dependence (first_arg, insn, REG_DEP_ANTI);
16243 /* Avoid cross block motion of function argument through adding dependency
16244 from the first non-jump instruction in bb. */
16245 static void
16246 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16248 rtx_insn *insn = BB_END (bb);
16250 while (insn)
16252 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16254 rtx set = single_set (insn);
16255 if (set)
16257 avoid_func_arg_motion (arg, insn);
16258 return;
16261 if (insn == BB_HEAD (bb))
16262 return;
16263 insn = PREV_INSN (insn);
16267 /* Hook for pre-reload schedule - avoid motion of function arguments
16268 passed in likely spilled HW registers. */
16269 static void
16270 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16272 rtx_insn *insn;
16273 rtx_insn *first_arg = NULL;
16274 if (reload_completed)
16275 return;
16276 while (head != tail && DEBUG_INSN_P (head))
16277 head = NEXT_INSN (head);
16278 for (insn = tail; insn != head; insn = PREV_INSN (insn))
16279 if (INSN_P (insn) && CALL_P (insn))
16281 first_arg = add_parameter_dependencies (insn, head);
16282 if (first_arg)
16284 /* Add dependee for first argument to predecessors if only
16285 region contains more than one block. */
16286 basic_block bb = BLOCK_FOR_INSN (insn);
16287 int rgn = CONTAINING_RGN (bb->index);
16288 int nr_blks = RGN_NR_BLOCKS (rgn);
16289 /* Skip trivial regions and region head blocks that can have
16290 predecessors outside of region. */
16291 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16293 edge e;
16294 edge_iterator ei;
16296 /* Regions are SCCs with the exception of selective
16297 scheduling with pipelining of outer blocks enabled.
16298 So also check that immediate predecessors of a non-head
16299 block are in the same region. */
16300 FOR_EACH_EDGE (e, ei, bb->preds)
16302 /* Avoid creating of loop-carried dependencies through
16303 using topological ordering in the region. */
16304 if (rgn == CONTAINING_RGN (e->src->index)
16305 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16306 add_dependee_for_func_arg (first_arg, e->src);
16309 insn = first_arg;
16310 if (insn == head)
16311 break;
16314 else if (first_arg)
16315 avoid_func_arg_motion (first_arg, insn);
16318 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16319 HW registers to maximum, to schedule them at soon as possible. These are
16320 moves from function argument registers at the top of the function entry
16321 and moves from function return value registers after call. */
16322 static int
16323 ix86_adjust_priority (rtx_insn *insn, int priority)
16325 rtx set;
16327 if (reload_completed)
16328 return priority;
16330 if (!NONDEBUG_INSN_P (insn))
16331 return priority;
16333 set = single_set (insn);
16334 if (set)
16336 rtx tmp = SET_SRC (set);
16337 if (REG_P (tmp)
16338 && HARD_REGISTER_P (tmp)
16339 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16340 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16341 return current_sched_info->sched_max_insns_priority;
16344 return priority;
16347 /* Prepare for scheduling pass. */
16348 static void
16349 ix86_sched_init_global (FILE *, int, int)
16351 /* Install scheduling hooks for current CPU. Some of these hooks are used
16352 in time-critical parts of the scheduler, so we only set them up when
16353 they are actually used. */
16354 switch (ix86_tune)
16356 case PROCESSOR_CORE2:
16357 case PROCESSOR_NEHALEM:
16358 case PROCESSOR_SANDYBRIDGE:
16359 case PROCESSOR_HASWELL:
16360 case PROCESSOR_GENERIC:
16361 /* Do not perform multipass scheduling for pre-reload schedule
16362 to save compile time. */
16363 if (reload_completed)
16365 ix86_core2i7_init_hooks ();
16366 break;
16368 /* Fall through. */
16369 default:
16370 targetm.sched.dfa_post_advance_cycle = NULL;
16371 targetm.sched.first_cycle_multipass_init = NULL;
16372 targetm.sched.first_cycle_multipass_begin = NULL;
16373 targetm.sched.first_cycle_multipass_issue = NULL;
16374 targetm.sched.first_cycle_multipass_backtrack = NULL;
16375 targetm.sched.first_cycle_multipass_end = NULL;
16376 targetm.sched.first_cycle_multipass_fini = NULL;
16377 break;
16382 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16384 static HOST_WIDE_INT
16385 ix86_static_rtx_alignment (machine_mode mode)
16387 if (mode == DFmode)
16388 return 64;
16389 if (ALIGN_MODE_128 (mode))
16390 return MAX (128, GET_MODE_ALIGNMENT (mode));
16391 return GET_MODE_ALIGNMENT (mode);
16394 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16396 static HOST_WIDE_INT
16397 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16399 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16400 || TREE_CODE (exp) == INTEGER_CST)
16402 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16403 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16404 return MAX (mode_align, align);
16406 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16407 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16408 return BITS_PER_WORD;
16410 return align;
16413 /* Implement TARGET_EMPTY_RECORD_P. */
16415 static bool
16416 ix86_is_empty_record (const_tree type)
16418 if (!TARGET_64BIT)
16419 return false;
16420 return default_is_empty_record (type);
16423 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16425 static void
16426 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16428 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16430 if (!cum->warn_empty)
16431 return;
16433 if (!TYPE_EMPTY_P (type))
16434 return;
16436 /* Don't warn if the function isn't visible outside of the TU. */
16437 if (cum->decl && !TREE_PUBLIC (cum->decl))
16438 return;
16440 const_tree ctx = get_ultimate_context (cum->decl);
16441 if (ctx != NULL_TREE
16442 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16443 return;
16445 /* If the actual size of the type is zero, then there is no change
16446 in how objects of this size are passed. */
16447 if (int_size_in_bytes (type) == 0)
16448 return;
16450 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16451 "changes in %<-fabi-version=12%> (GCC 8)", type);
16453 /* Only warn once. */
16454 cum->warn_empty = false;
16457 /* This hook returns name of multilib ABI. */
16459 static const char *
16460 ix86_get_multilib_abi_name (void)
16462 if (!(TARGET_64BIT_P (ix86_isa_flags)))
16463 return "i386";
16464 else if (TARGET_X32_P (ix86_isa_flags))
16465 return "x32";
16466 else
16467 return "x86_64";
16470 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16471 the data type, and ALIGN is the alignment that the object would
16472 ordinarily have. */
16474 static int
16475 iamcu_alignment (tree type, int align)
16477 machine_mode mode;
16479 if (align < 32 || TYPE_USER_ALIGN (type))
16480 return align;
16482 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16483 bytes. */
16484 mode = TYPE_MODE (strip_array_types (type));
16485 switch (GET_MODE_CLASS (mode))
16487 case MODE_INT:
16488 case MODE_COMPLEX_INT:
16489 case MODE_COMPLEX_FLOAT:
16490 case MODE_FLOAT:
16491 case MODE_DECIMAL_FLOAT:
16492 return 32;
16493 default:
16494 return align;
16498 /* Compute the alignment for a static variable.
16499 TYPE is the data type, and ALIGN is the alignment that
16500 the object would ordinarily have. The value of this function is used
16501 instead of that alignment to align the object. */
16504 ix86_data_alignment (tree type, unsigned int align, bool opt)
16506 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16507 for symbols from other compilation units or symbols that don't need
16508 to bind locally. In order to preserve some ABI compatibility with
16509 those compilers, ensure we don't decrease alignment from what we
16510 used to assume. */
16512 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16514 /* A data structure, equal or greater than the size of a cache line
16515 (64 bytes in the Pentium 4 and other recent Intel processors, including
16516 processors based on Intel Core microarchitecture) should be aligned
16517 so that its base address is a multiple of a cache line size. */
16519 unsigned int max_align
16520 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16522 if (max_align < BITS_PER_WORD)
16523 max_align = BITS_PER_WORD;
16525 switch (ix86_align_data_type)
16527 case ix86_align_data_type_abi: opt = false; break;
16528 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16529 case ix86_align_data_type_cacheline: break;
16532 if (TARGET_IAMCU)
16533 align = iamcu_alignment (type, align);
16535 if (opt
16536 && AGGREGATE_TYPE_P (type)
16537 && TYPE_SIZE (type)
16538 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16540 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16541 && align < max_align_compat)
16542 align = max_align_compat;
16543 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16544 && align < max_align)
16545 align = max_align;
16548 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16549 to 16byte boundary. */
16550 if (TARGET_64BIT)
16552 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16553 && TYPE_SIZE (type)
16554 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16555 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16556 && align < 128)
16557 return 128;
16560 if (!opt)
16561 return align;
16563 if (TREE_CODE (type) == ARRAY_TYPE)
16565 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16566 return 64;
16567 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16568 return 128;
16570 else if (TREE_CODE (type) == COMPLEX_TYPE)
16573 if (TYPE_MODE (type) == DCmode && align < 64)
16574 return 64;
16575 if ((TYPE_MODE (type) == XCmode
16576 || TYPE_MODE (type) == TCmode) && align < 128)
16577 return 128;
16579 else if ((TREE_CODE (type) == RECORD_TYPE
16580 || TREE_CODE (type) == UNION_TYPE
16581 || TREE_CODE (type) == QUAL_UNION_TYPE)
16582 && TYPE_FIELDS (type))
16584 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16585 return 64;
16586 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16587 return 128;
16589 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16590 || TREE_CODE (type) == INTEGER_TYPE)
16592 if (TYPE_MODE (type) == DFmode && align < 64)
16593 return 64;
16594 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16595 return 128;
16598 return align;
16601 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
16602 static void
16603 ix86_lower_local_decl_alignment (tree decl)
16605 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
16606 DECL_ALIGN (decl), true);
16607 if (new_align < DECL_ALIGN (decl))
16608 SET_DECL_ALIGN (decl, new_align);
16611 /* Compute the alignment for a local variable or a stack slot. EXP is
16612 the data type or decl itself, MODE is the widest mode available and
16613 ALIGN is the alignment that the object would ordinarily have. The
16614 value of this macro is used instead of that alignment to align the
16615 object. */
16617 unsigned int
16618 ix86_local_alignment (tree exp, machine_mode mode,
16619 unsigned int align, bool may_lower)
16621 tree type, decl;
16623 if (exp && DECL_P (exp))
16625 type = TREE_TYPE (exp);
16626 decl = exp;
16628 else
16630 type = exp;
16631 decl = NULL;
16634 /* Don't do dynamic stack realignment for long long objects with
16635 -mpreferred-stack-boundary=2. */
16636 if (may_lower
16637 && !TARGET_64BIT
16638 && align == 64
16639 && ix86_preferred_stack_boundary < 64
16640 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
16641 && (!type || !TYPE_USER_ALIGN (type))
16642 && (!decl || !DECL_USER_ALIGN (decl)))
16643 align = 32;
16645 /* If TYPE is NULL, we are allocating a stack slot for caller-save
16646 register in MODE. We will return the largest alignment of XF
16647 and DF. */
16648 if (!type)
16650 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
16651 align = GET_MODE_ALIGNMENT (DFmode);
16652 return align;
16655 /* Don't increase alignment for Intel MCU psABI. */
16656 if (TARGET_IAMCU)
16657 return align;
16659 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16660 to 16byte boundary. Exact wording is:
16662 An array uses the same alignment as its elements, except that a local or
16663 global array variable of length at least 16 bytes or
16664 a C99 variable-length array variable always has alignment of at least 16 bytes.
16666 This was added to allow use of aligned SSE instructions at arrays. This
16667 rule is meant for static storage (where compiler cannot do the analysis
16668 by itself). We follow it for automatic variables only when convenient.
16669 We fully control everything in the function compiled and functions from
16670 other unit cannot rely on the alignment.
16672 Exclude va_list type. It is the common case of local array where
16673 we cannot benefit from the alignment.
16675 TODO: Probably one should optimize for size only when var is not escaping. */
16676 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
16677 && TARGET_SSE)
16679 if (AGGREGATE_TYPE_P (type)
16680 && (va_list_type_node == NULL_TREE
16681 || (TYPE_MAIN_VARIANT (type)
16682 != TYPE_MAIN_VARIANT (va_list_type_node)))
16683 && TYPE_SIZE (type)
16684 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16685 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16686 && align < 128)
16687 return 128;
16689 if (TREE_CODE (type) == ARRAY_TYPE)
16691 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16692 return 64;
16693 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16694 return 128;
16696 else if (TREE_CODE (type) == COMPLEX_TYPE)
16698 if (TYPE_MODE (type) == DCmode && align < 64)
16699 return 64;
16700 if ((TYPE_MODE (type) == XCmode
16701 || TYPE_MODE (type) == TCmode) && align < 128)
16702 return 128;
16704 else if ((TREE_CODE (type) == RECORD_TYPE
16705 || TREE_CODE (type) == UNION_TYPE
16706 || TREE_CODE (type) == QUAL_UNION_TYPE)
16707 && TYPE_FIELDS (type))
16709 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16710 return 64;
16711 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16712 return 128;
16714 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16715 || TREE_CODE (type) == INTEGER_TYPE)
16718 if (TYPE_MODE (type) == DFmode && align < 64)
16719 return 64;
16720 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16721 return 128;
16723 return align;
16726 /* Compute the minimum required alignment for dynamic stack realignment
16727 purposes for a local variable, parameter or a stack slot. EXP is
16728 the data type or decl itself, MODE is its mode and ALIGN is the
16729 alignment that the object would ordinarily have. */
16731 unsigned int
16732 ix86_minimum_alignment (tree exp, machine_mode mode,
16733 unsigned int align)
16735 tree type, decl;
16737 if (exp && DECL_P (exp))
16739 type = TREE_TYPE (exp);
16740 decl = exp;
16742 else
16744 type = exp;
16745 decl = NULL;
16748 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
16749 return align;
16751 /* Don't do dynamic stack realignment for long long objects with
16752 -mpreferred-stack-boundary=2. */
16753 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
16754 && (!type || !TYPE_USER_ALIGN (type))
16755 && (!decl || !DECL_USER_ALIGN (decl)))
16757 gcc_checking_assert (!TARGET_STV);
16758 return 32;
16761 return align;
16764 /* Find a location for the static chain incoming to a nested function.
16765 This is a register, unless all free registers are used by arguments. */
16767 static rtx
16768 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
16770 unsigned regno;
16772 if (TARGET_64BIT)
16774 /* We always use R10 in 64-bit mode. */
16775 regno = R10_REG;
16777 else
16779 const_tree fntype, fndecl;
16780 unsigned int ccvt;
16782 /* By default in 32-bit mode we use ECX to pass the static chain. */
16783 regno = CX_REG;
16785 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
16787 fntype = TREE_TYPE (fndecl_or_type);
16788 fndecl = fndecl_or_type;
16790 else
16792 fntype = fndecl_or_type;
16793 fndecl = NULL;
16796 ccvt = ix86_get_callcvt (fntype);
16797 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
16799 /* Fastcall functions use ecx/edx for arguments, which leaves
16800 us with EAX for the static chain.
16801 Thiscall functions use ecx for arguments, which also
16802 leaves us with EAX for the static chain. */
16803 regno = AX_REG;
16805 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
16807 /* Thiscall functions use ecx for arguments, which leaves
16808 us with EAX and EDX for the static chain.
16809 We are using for abi-compatibility EAX. */
16810 regno = AX_REG;
16812 else if (ix86_function_regparm (fntype, fndecl) == 3)
16814 /* For regparm 3, we have no free call-clobbered registers in
16815 which to store the static chain. In order to implement this,
16816 we have the trampoline push the static chain to the stack.
16817 However, we can't push a value below the return address when
16818 we call the nested function directly, so we have to use an
16819 alternate entry point. For this we use ESI, and have the
16820 alternate entry point push ESI, so that things appear the
16821 same once we're executing the nested function. */
16822 if (incoming_p)
16824 if (fndecl == current_function_decl
16825 && !ix86_static_chain_on_stack)
16827 gcc_assert (!reload_completed);
16828 ix86_static_chain_on_stack = true;
16830 return gen_frame_mem (SImode,
16831 plus_constant (Pmode,
16832 arg_pointer_rtx, -8));
16834 regno = SI_REG;
16838 return gen_rtx_REG (Pmode, regno);
16841 /* Emit RTL insns to initialize the variable parts of a trampoline.
16842 FNDECL is the decl of the target address; M_TRAMP is a MEM for
16843 the trampoline, and CHAIN_VALUE is an RTX for the static chain
16844 to be passed to the target function. */
16846 static void
16847 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
16849 rtx mem, fnaddr;
16850 int opcode;
16851 int offset = 0;
16852 bool need_endbr = (flag_cf_protection & CF_BRANCH);
16854 fnaddr = XEXP (DECL_RTL (fndecl), 0);
16856 if (TARGET_64BIT)
16858 int size;
16860 if (need_endbr)
16862 /* Insert ENDBR64. */
16863 mem = adjust_address (m_tramp, SImode, offset);
16864 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
16865 offset += 4;
16868 /* Load the function address to r11. Try to load address using
16869 the shorter movl instead of movabs. We may want to support
16870 movq for kernel mode, but kernel does not use trampolines at
16871 the moment. FNADDR is a 32bit address and may not be in
16872 DImode when ptr_mode == SImode. Always use movl in this
16873 case. */
16874 if (ptr_mode == SImode
16875 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16877 fnaddr = copy_addr_to_reg (fnaddr);
16879 mem = adjust_address (m_tramp, HImode, offset);
16880 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
16882 mem = adjust_address (m_tramp, SImode, offset + 2);
16883 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
16884 offset += 6;
16886 else
16888 mem = adjust_address (m_tramp, HImode, offset);
16889 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
16891 mem = adjust_address (m_tramp, DImode, offset + 2);
16892 emit_move_insn (mem, fnaddr);
16893 offset += 10;
16896 /* Load static chain using movabs to r10. Use the shorter movl
16897 instead of movabs when ptr_mode == SImode. */
16898 if (ptr_mode == SImode)
16900 opcode = 0xba41;
16901 size = 6;
16903 else
16905 opcode = 0xba49;
16906 size = 10;
16909 mem = adjust_address (m_tramp, HImode, offset);
16910 emit_move_insn (mem, gen_int_mode (opcode, HImode));
16912 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
16913 emit_move_insn (mem, chain_value);
16914 offset += size;
16916 /* Jump to r11; the last (unused) byte is a nop, only there to
16917 pad the write out to a single 32-bit store. */
16918 mem = adjust_address (m_tramp, SImode, offset);
16919 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
16920 offset += 4;
16922 else
16924 rtx disp, chain;
16926 /* Depending on the static chain location, either load a register
16927 with a constant, or push the constant to the stack. All of the
16928 instructions are the same size. */
16929 chain = ix86_static_chain (fndecl, true);
16930 if (REG_P (chain))
16932 switch (REGNO (chain))
16934 case AX_REG:
16935 opcode = 0xb8; break;
16936 case CX_REG:
16937 opcode = 0xb9; break;
16938 default:
16939 gcc_unreachable ();
16942 else
16943 opcode = 0x68;
16945 if (need_endbr)
16947 /* Insert ENDBR32. */
16948 mem = adjust_address (m_tramp, SImode, offset);
16949 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
16950 offset += 4;
16953 mem = adjust_address (m_tramp, QImode, offset);
16954 emit_move_insn (mem, gen_int_mode (opcode, QImode));
16956 mem = adjust_address (m_tramp, SImode, offset + 1);
16957 emit_move_insn (mem, chain_value);
16958 offset += 5;
16960 mem = adjust_address (m_tramp, QImode, offset);
16961 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
16963 mem = adjust_address (m_tramp, SImode, offset + 1);
16965 /* Compute offset from the end of the jmp to the target function.
16966 In the case in which the trampoline stores the static chain on
16967 the stack, we need to skip the first insn which pushes the
16968 (call-saved) register static chain; this push is 1 byte. */
16969 offset += 5;
16970 int skip = MEM_P (chain) ? 1 : 0;
16971 /* Skip ENDBR32 at the entry of the target function. */
16972 if (need_endbr
16973 && !cgraph_node::get (fndecl)->only_called_directly_p ())
16974 skip += 4;
16975 disp = expand_binop (SImode, sub_optab, fnaddr,
16976 plus_constant (Pmode, XEXP (m_tramp, 0),
16977 offset - skip),
16978 NULL_RTX, 1, OPTAB_DIRECT);
16979 emit_move_insn (mem, disp);
16982 gcc_assert (offset <= TRAMPOLINE_SIZE);
16984 #ifdef HAVE_ENABLE_EXECUTE_STACK
16985 #ifdef CHECK_EXECUTE_STACK_ENABLED
16986 if (CHECK_EXECUTE_STACK_ENABLED)
16987 #endif
16988 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16989 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
16990 #endif
16993 static bool
16994 ix86_allocate_stack_slots_for_args (void)
16996 /* Naked functions should not allocate stack slots for arguments. */
16997 return !ix86_function_naked (current_function_decl);
17000 static bool
17001 ix86_warn_func_return (tree decl)
17003 /* Naked functions are implemented entirely in assembly, including the
17004 return sequence, so suppress warnings about this. */
17005 return !ix86_function_naked (decl);
17008 /* Return the shift count of a vector by scalar shift builtin second argument
17009 ARG1. */
17010 static tree
17011 ix86_vector_shift_count (tree arg1)
17013 if (tree_fits_uhwi_p (arg1))
17014 return arg1;
17015 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17017 /* The count argument is weird, passed in as various 128-bit
17018 (or 64-bit) vectors, the low 64 bits from it are the count. */
17019 unsigned char buf[16];
17020 int len = native_encode_expr (arg1, buf, 16);
17021 if (len == 0)
17022 return NULL_TREE;
17023 tree t = native_interpret_expr (uint64_type_node, buf, len);
17024 if (t && tree_fits_uhwi_p (t))
17025 return t;
17027 return NULL_TREE;
17030 static tree
17031 ix86_fold_builtin (tree fndecl, int n_args,
17032 tree *args, bool ignore ATTRIBUTE_UNUSED)
17034 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17036 enum ix86_builtins fn_code
17037 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17038 enum rtx_code rcode;
17039 bool is_vshift;
17040 unsigned HOST_WIDE_INT mask;
17042 switch (fn_code)
17044 case IX86_BUILTIN_CPU_IS:
17045 case IX86_BUILTIN_CPU_SUPPORTS:
17046 gcc_assert (n_args == 1);
17047 return fold_builtin_cpu (fndecl, args);
17049 case IX86_BUILTIN_NANQ:
17050 case IX86_BUILTIN_NANSQ:
17052 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17053 const char *str = c_getstr (*args);
17054 int quiet = fn_code == IX86_BUILTIN_NANQ;
17055 REAL_VALUE_TYPE real;
17057 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17058 return build_real (type, real);
17059 return NULL_TREE;
17062 case IX86_BUILTIN_INFQ:
17063 case IX86_BUILTIN_HUGE_VALQ:
17065 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17066 REAL_VALUE_TYPE inf;
17067 real_inf (&inf);
17068 return build_real (type, inf);
17071 case IX86_BUILTIN_TZCNT16:
17072 case IX86_BUILTIN_CTZS:
17073 case IX86_BUILTIN_TZCNT32:
17074 case IX86_BUILTIN_TZCNT64:
17075 gcc_assert (n_args == 1);
17076 if (TREE_CODE (args[0]) == INTEGER_CST)
17078 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17079 tree arg = args[0];
17080 if (fn_code == IX86_BUILTIN_TZCNT16
17081 || fn_code == IX86_BUILTIN_CTZS)
17082 arg = fold_convert (short_unsigned_type_node, arg);
17083 if (integer_zerop (arg))
17084 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17085 else
17086 return fold_const_call (CFN_CTZ, type, arg);
17088 break;
17090 case IX86_BUILTIN_LZCNT16:
17091 case IX86_BUILTIN_CLZS:
17092 case IX86_BUILTIN_LZCNT32:
17093 case IX86_BUILTIN_LZCNT64:
17094 gcc_assert (n_args == 1);
17095 if (TREE_CODE (args[0]) == INTEGER_CST)
17097 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17098 tree arg = args[0];
17099 if (fn_code == IX86_BUILTIN_LZCNT16
17100 || fn_code == IX86_BUILTIN_CLZS)
17101 arg = fold_convert (short_unsigned_type_node, arg);
17102 if (integer_zerop (arg))
17103 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17104 else
17105 return fold_const_call (CFN_CLZ, type, arg);
17107 break;
17109 case IX86_BUILTIN_BEXTR32:
17110 case IX86_BUILTIN_BEXTR64:
17111 case IX86_BUILTIN_BEXTRI32:
17112 case IX86_BUILTIN_BEXTRI64:
17113 gcc_assert (n_args == 2);
17114 if (tree_fits_uhwi_p (args[1]))
17116 unsigned HOST_WIDE_INT res = 0;
17117 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17118 unsigned int start = tree_to_uhwi (args[1]);
17119 unsigned int len = (start & 0xff00) >> 8;
17120 start &= 0xff;
17121 if (start >= prec || len == 0)
17122 res = 0;
17123 else if (!tree_fits_uhwi_p (args[0]))
17124 break;
17125 else
17126 res = tree_to_uhwi (args[0]) >> start;
17127 if (len > prec)
17128 len = prec;
17129 if (len < HOST_BITS_PER_WIDE_INT)
17130 res &= (HOST_WIDE_INT_1U << len) - 1;
17131 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17133 break;
17135 case IX86_BUILTIN_BZHI32:
17136 case IX86_BUILTIN_BZHI64:
17137 gcc_assert (n_args == 2);
17138 if (tree_fits_uhwi_p (args[1]))
17140 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17141 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17142 return args[0];
17143 if (idx == 0)
17144 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17145 if (!tree_fits_uhwi_p (args[0]))
17146 break;
17147 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17148 res &= ~(HOST_WIDE_INT_M1U << idx);
17149 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17151 break;
17153 case IX86_BUILTIN_PDEP32:
17154 case IX86_BUILTIN_PDEP64:
17155 gcc_assert (n_args == 2);
17156 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17158 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17159 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17160 unsigned HOST_WIDE_INT res = 0;
17161 unsigned HOST_WIDE_INT m, k = 1;
17162 for (m = 1; m; m <<= 1)
17163 if ((mask & m) != 0)
17165 if ((src & k) != 0)
17166 res |= m;
17167 k <<= 1;
17169 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17171 break;
17173 case IX86_BUILTIN_PEXT32:
17174 case IX86_BUILTIN_PEXT64:
17175 gcc_assert (n_args == 2);
17176 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17178 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17179 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17180 unsigned HOST_WIDE_INT res = 0;
17181 unsigned HOST_WIDE_INT m, k = 1;
17182 for (m = 1; m; m <<= 1)
17183 if ((mask & m) != 0)
17185 if ((src & m) != 0)
17186 res |= k;
17187 k <<= 1;
17189 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17191 break;
17193 case IX86_BUILTIN_MOVMSKPS:
17194 case IX86_BUILTIN_PMOVMSKB:
17195 case IX86_BUILTIN_MOVMSKPD:
17196 case IX86_BUILTIN_PMOVMSKB128:
17197 case IX86_BUILTIN_MOVMSKPD256:
17198 case IX86_BUILTIN_MOVMSKPS256:
17199 case IX86_BUILTIN_PMOVMSKB256:
17200 gcc_assert (n_args == 1);
17201 if (TREE_CODE (args[0]) == VECTOR_CST)
17203 HOST_WIDE_INT res = 0;
17204 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17206 tree e = VECTOR_CST_ELT (args[0], i);
17207 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17209 if (wi::neg_p (wi::to_wide (e)))
17210 res |= HOST_WIDE_INT_1 << i;
17212 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17214 if (TREE_REAL_CST (e).sign)
17215 res |= HOST_WIDE_INT_1 << i;
17217 else
17218 return NULL_TREE;
17220 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17222 break;
17224 case IX86_BUILTIN_PSLLD:
17225 case IX86_BUILTIN_PSLLD128:
17226 case IX86_BUILTIN_PSLLD128_MASK:
17227 case IX86_BUILTIN_PSLLD256:
17228 case IX86_BUILTIN_PSLLD256_MASK:
17229 case IX86_BUILTIN_PSLLD512:
17230 case IX86_BUILTIN_PSLLDI:
17231 case IX86_BUILTIN_PSLLDI128:
17232 case IX86_BUILTIN_PSLLDI128_MASK:
17233 case IX86_BUILTIN_PSLLDI256:
17234 case IX86_BUILTIN_PSLLDI256_MASK:
17235 case IX86_BUILTIN_PSLLDI512:
17236 case IX86_BUILTIN_PSLLQ:
17237 case IX86_BUILTIN_PSLLQ128:
17238 case IX86_BUILTIN_PSLLQ128_MASK:
17239 case IX86_BUILTIN_PSLLQ256:
17240 case IX86_BUILTIN_PSLLQ256_MASK:
17241 case IX86_BUILTIN_PSLLQ512:
17242 case IX86_BUILTIN_PSLLQI:
17243 case IX86_BUILTIN_PSLLQI128:
17244 case IX86_BUILTIN_PSLLQI128_MASK:
17245 case IX86_BUILTIN_PSLLQI256:
17246 case IX86_BUILTIN_PSLLQI256_MASK:
17247 case IX86_BUILTIN_PSLLQI512:
17248 case IX86_BUILTIN_PSLLW:
17249 case IX86_BUILTIN_PSLLW128:
17250 case IX86_BUILTIN_PSLLW128_MASK:
17251 case IX86_BUILTIN_PSLLW256:
17252 case IX86_BUILTIN_PSLLW256_MASK:
17253 case IX86_BUILTIN_PSLLW512_MASK:
17254 case IX86_BUILTIN_PSLLWI:
17255 case IX86_BUILTIN_PSLLWI128:
17256 case IX86_BUILTIN_PSLLWI128_MASK:
17257 case IX86_BUILTIN_PSLLWI256:
17258 case IX86_BUILTIN_PSLLWI256_MASK:
17259 case IX86_BUILTIN_PSLLWI512_MASK:
17260 rcode = ASHIFT;
17261 is_vshift = false;
17262 goto do_shift;
17263 case IX86_BUILTIN_PSRAD:
17264 case IX86_BUILTIN_PSRAD128:
17265 case IX86_BUILTIN_PSRAD128_MASK:
17266 case IX86_BUILTIN_PSRAD256:
17267 case IX86_BUILTIN_PSRAD256_MASK:
17268 case IX86_BUILTIN_PSRAD512:
17269 case IX86_BUILTIN_PSRADI:
17270 case IX86_BUILTIN_PSRADI128:
17271 case IX86_BUILTIN_PSRADI128_MASK:
17272 case IX86_BUILTIN_PSRADI256:
17273 case IX86_BUILTIN_PSRADI256_MASK:
17274 case IX86_BUILTIN_PSRADI512:
17275 case IX86_BUILTIN_PSRAQ128_MASK:
17276 case IX86_BUILTIN_PSRAQ256_MASK:
17277 case IX86_BUILTIN_PSRAQ512:
17278 case IX86_BUILTIN_PSRAQI128_MASK:
17279 case IX86_BUILTIN_PSRAQI256_MASK:
17280 case IX86_BUILTIN_PSRAQI512:
17281 case IX86_BUILTIN_PSRAW:
17282 case IX86_BUILTIN_PSRAW128:
17283 case IX86_BUILTIN_PSRAW128_MASK:
17284 case IX86_BUILTIN_PSRAW256:
17285 case IX86_BUILTIN_PSRAW256_MASK:
17286 case IX86_BUILTIN_PSRAW512:
17287 case IX86_BUILTIN_PSRAWI:
17288 case IX86_BUILTIN_PSRAWI128:
17289 case IX86_BUILTIN_PSRAWI128_MASK:
17290 case IX86_BUILTIN_PSRAWI256:
17291 case IX86_BUILTIN_PSRAWI256_MASK:
17292 case IX86_BUILTIN_PSRAWI512:
17293 rcode = ASHIFTRT;
17294 is_vshift = false;
17295 goto do_shift;
17296 case IX86_BUILTIN_PSRLD:
17297 case IX86_BUILTIN_PSRLD128:
17298 case IX86_BUILTIN_PSRLD128_MASK:
17299 case IX86_BUILTIN_PSRLD256:
17300 case IX86_BUILTIN_PSRLD256_MASK:
17301 case IX86_BUILTIN_PSRLD512:
17302 case IX86_BUILTIN_PSRLDI:
17303 case IX86_BUILTIN_PSRLDI128:
17304 case IX86_BUILTIN_PSRLDI128_MASK:
17305 case IX86_BUILTIN_PSRLDI256:
17306 case IX86_BUILTIN_PSRLDI256_MASK:
17307 case IX86_BUILTIN_PSRLDI512:
17308 case IX86_BUILTIN_PSRLQ:
17309 case IX86_BUILTIN_PSRLQ128:
17310 case IX86_BUILTIN_PSRLQ128_MASK:
17311 case IX86_BUILTIN_PSRLQ256:
17312 case IX86_BUILTIN_PSRLQ256_MASK:
17313 case IX86_BUILTIN_PSRLQ512:
17314 case IX86_BUILTIN_PSRLQI:
17315 case IX86_BUILTIN_PSRLQI128:
17316 case IX86_BUILTIN_PSRLQI128_MASK:
17317 case IX86_BUILTIN_PSRLQI256:
17318 case IX86_BUILTIN_PSRLQI256_MASK:
17319 case IX86_BUILTIN_PSRLQI512:
17320 case IX86_BUILTIN_PSRLW:
17321 case IX86_BUILTIN_PSRLW128:
17322 case IX86_BUILTIN_PSRLW128_MASK:
17323 case IX86_BUILTIN_PSRLW256:
17324 case IX86_BUILTIN_PSRLW256_MASK:
17325 case IX86_BUILTIN_PSRLW512:
17326 case IX86_BUILTIN_PSRLWI:
17327 case IX86_BUILTIN_PSRLWI128:
17328 case IX86_BUILTIN_PSRLWI128_MASK:
17329 case IX86_BUILTIN_PSRLWI256:
17330 case IX86_BUILTIN_PSRLWI256_MASK:
17331 case IX86_BUILTIN_PSRLWI512:
17332 rcode = LSHIFTRT;
17333 is_vshift = false;
17334 goto do_shift;
17335 case IX86_BUILTIN_PSLLVV16HI:
17336 case IX86_BUILTIN_PSLLVV16SI:
17337 case IX86_BUILTIN_PSLLVV2DI:
17338 case IX86_BUILTIN_PSLLVV2DI_MASK:
17339 case IX86_BUILTIN_PSLLVV32HI:
17340 case IX86_BUILTIN_PSLLVV4DI:
17341 case IX86_BUILTIN_PSLLVV4DI_MASK:
17342 case IX86_BUILTIN_PSLLVV4SI:
17343 case IX86_BUILTIN_PSLLVV4SI_MASK:
17344 case IX86_BUILTIN_PSLLVV8DI:
17345 case IX86_BUILTIN_PSLLVV8HI:
17346 case IX86_BUILTIN_PSLLVV8SI:
17347 case IX86_BUILTIN_PSLLVV8SI_MASK:
17348 rcode = ASHIFT;
17349 is_vshift = true;
17350 goto do_shift;
17351 case IX86_BUILTIN_PSRAVQ128:
17352 case IX86_BUILTIN_PSRAVQ256:
17353 case IX86_BUILTIN_PSRAVV16HI:
17354 case IX86_BUILTIN_PSRAVV16SI:
17355 case IX86_BUILTIN_PSRAVV32HI:
17356 case IX86_BUILTIN_PSRAVV4SI:
17357 case IX86_BUILTIN_PSRAVV4SI_MASK:
17358 case IX86_BUILTIN_PSRAVV8DI:
17359 case IX86_BUILTIN_PSRAVV8HI:
17360 case IX86_BUILTIN_PSRAVV8SI:
17361 case IX86_BUILTIN_PSRAVV8SI_MASK:
17362 rcode = ASHIFTRT;
17363 is_vshift = true;
17364 goto do_shift;
17365 case IX86_BUILTIN_PSRLVV16HI:
17366 case IX86_BUILTIN_PSRLVV16SI:
17367 case IX86_BUILTIN_PSRLVV2DI:
17368 case IX86_BUILTIN_PSRLVV2DI_MASK:
17369 case IX86_BUILTIN_PSRLVV32HI:
17370 case IX86_BUILTIN_PSRLVV4DI:
17371 case IX86_BUILTIN_PSRLVV4DI_MASK:
17372 case IX86_BUILTIN_PSRLVV4SI:
17373 case IX86_BUILTIN_PSRLVV4SI_MASK:
17374 case IX86_BUILTIN_PSRLVV8DI:
17375 case IX86_BUILTIN_PSRLVV8HI:
17376 case IX86_BUILTIN_PSRLVV8SI:
17377 case IX86_BUILTIN_PSRLVV8SI_MASK:
17378 rcode = LSHIFTRT;
17379 is_vshift = true;
17380 goto do_shift;
17382 do_shift:
17383 gcc_assert (n_args >= 2);
17384 if (TREE_CODE (args[0]) != VECTOR_CST)
17385 break;
17386 mask = HOST_WIDE_INT_M1U;
17387 if (n_args > 2)
17389 /* This is masked shift. */
17390 if (!tree_fits_uhwi_p (args[n_args - 1])
17391 || TREE_SIDE_EFFECTS (args[n_args - 2]))
17392 break;
17393 mask = tree_to_uhwi (args[n_args - 1]);
17394 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17395 mask |= HOST_WIDE_INT_M1U << elems;
17396 if (mask != HOST_WIDE_INT_M1U
17397 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17398 break;
17399 if (mask == (HOST_WIDE_INT_M1U << elems))
17400 return args[n_args - 2];
17402 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17403 break;
17404 if (tree tem = (is_vshift ? integer_one_node
17405 : ix86_vector_shift_count (args[1])))
17407 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17408 unsigned HOST_WIDE_INT prec
17409 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17410 if (count == 0 && mask == HOST_WIDE_INT_M1U)
17411 return args[0];
17412 if (count >= prec)
17414 if (rcode == ASHIFTRT)
17415 count = prec - 1;
17416 else if (mask == HOST_WIDE_INT_M1U)
17417 return build_zero_cst (TREE_TYPE (args[0]));
17419 tree countt = NULL_TREE;
17420 if (!is_vshift)
17422 if (count >= prec)
17423 countt = integer_zero_node;
17424 else
17425 countt = build_int_cst (integer_type_node, count);
17427 tree_vector_builder builder;
17428 if (mask != HOST_WIDE_INT_M1U || is_vshift)
17429 builder.new_vector (TREE_TYPE (args[0]),
17430 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
17432 else
17433 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17434 false);
17435 unsigned int cnt = builder.encoded_nelts ();
17436 for (unsigned int i = 0; i < cnt; ++i)
17438 tree elt = VECTOR_CST_ELT (args[0], i);
17439 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17440 return NULL_TREE;
17441 tree type = TREE_TYPE (elt);
17442 if (rcode == LSHIFTRT)
17443 elt = fold_convert (unsigned_type_for (type), elt);
17444 if (is_vshift)
17446 countt = VECTOR_CST_ELT (args[1], i);
17447 if (TREE_CODE (countt) != INTEGER_CST
17448 || TREE_OVERFLOW (countt))
17449 return NULL_TREE;
17450 if (wi::neg_p (wi::to_wide (countt))
17451 || wi::to_widest (countt) >= prec)
17453 if (rcode == ASHIFTRT)
17454 countt = build_int_cst (TREE_TYPE (countt),
17455 prec - 1);
17456 else
17458 elt = build_zero_cst (TREE_TYPE (elt));
17459 countt = build_zero_cst (TREE_TYPE (countt));
17463 else if (count >= prec)
17464 elt = build_zero_cst (TREE_TYPE (elt));
17465 elt = const_binop (rcode == ASHIFT
17466 ? LSHIFT_EXPR : RSHIFT_EXPR,
17467 TREE_TYPE (elt), elt, countt);
17468 if (!elt || TREE_CODE (elt) != INTEGER_CST)
17469 return NULL_TREE;
17470 if (rcode == LSHIFTRT)
17471 elt = fold_convert (type, elt);
17472 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17474 elt = VECTOR_CST_ELT (args[n_args - 2], i);
17475 if (TREE_CODE (elt) != INTEGER_CST
17476 || TREE_OVERFLOW (elt))
17477 return NULL_TREE;
17479 builder.quick_push (elt);
17481 return builder.build ();
17483 break;
17485 default:
17486 break;
17490 #ifdef SUBTARGET_FOLD_BUILTIN
17491 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17492 #endif
17494 return NULL_TREE;
17497 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17498 constant) in GIMPLE. */
17500 bool
17501 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17503 gimple *stmt = gsi_stmt (*gsi);
17504 tree fndecl = gimple_call_fndecl (stmt);
17505 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17506 int n_args = gimple_call_num_args (stmt);
17507 enum ix86_builtins fn_code
17508 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17509 tree decl = NULL_TREE;
17510 tree arg0, arg1, arg2;
17511 enum rtx_code rcode;
17512 unsigned HOST_WIDE_INT count;
17513 bool is_vshift;
17515 switch (fn_code)
17517 case IX86_BUILTIN_TZCNT32:
17518 decl = builtin_decl_implicit (BUILT_IN_CTZ);
17519 goto fold_tzcnt_lzcnt;
17521 case IX86_BUILTIN_TZCNT64:
17522 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17523 goto fold_tzcnt_lzcnt;
17525 case IX86_BUILTIN_LZCNT32:
17526 decl = builtin_decl_implicit (BUILT_IN_CLZ);
17527 goto fold_tzcnt_lzcnt;
17529 case IX86_BUILTIN_LZCNT64:
17530 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17531 goto fold_tzcnt_lzcnt;
17533 fold_tzcnt_lzcnt:
17534 gcc_assert (n_args == 1);
17535 arg0 = gimple_call_arg (stmt, 0);
17536 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17538 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17539 /* If arg0 is provably non-zero, optimize into generic
17540 __builtin_c[tl]z{,ll} function the middle-end handles
17541 better. */
17542 if (!expr_not_equal_to (arg0, wi::zero (prec)))
17543 return false;
17545 location_t loc = gimple_location (stmt);
17546 gimple *g = gimple_build_call (decl, 1, arg0);
17547 gimple_set_location (g, loc);
17548 tree lhs = make_ssa_name (integer_type_node);
17549 gimple_call_set_lhs (g, lhs);
17550 gsi_insert_before (gsi, g, GSI_SAME_STMT);
17551 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17552 gimple_set_location (g, loc);
17553 gsi_replace (gsi, g, false);
17554 return true;
17556 break;
17558 case IX86_BUILTIN_BZHI32:
17559 case IX86_BUILTIN_BZHI64:
17560 gcc_assert (n_args == 2);
17561 arg1 = gimple_call_arg (stmt, 1);
17562 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17564 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17565 arg0 = gimple_call_arg (stmt, 0);
17566 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17567 break;
17568 location_t loc = gimple_location (stmt);
17569 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17570 gimple_set_location (g, loc);
17571 gsi_replace (gsi, g, false);
17572 return true;
17574 break;
17576 case IX86_BUILTIN_PDEP32:
17577 case IX86_BUILTIN_PDEP64:
17578 case IX86_BUILTIN_PEXT32:
17579 case IX86_BUILTIN_PEXT64:
17580 gcc_assert (n_args == 2);
17581 arg1 = gimple_call_arg (stmt, 1);
17582 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17584 location_t loc = gimple_location (stmt);
17585 arg0 = gimple_call_arg (stmt, 0);
17586 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17587 gimple_set_location (g, loc);
17588 gsi_replace (gsi, g, false);
17589 return true;
17591 break;
17593 case IX86_BUILTIN_PSLLD:
17594 case IX86_BUILTIN_PSLLD128:
17595 case IX86_BUILTIN_PSLLD128_MASK:
17596 case IX86_BUILTIN_PSLLD256:
17597 case IX86_BUILTIN_PSLLD256_MASK:
17598 case IX86_BUILTIN_PSLLD512:
17599 case IX86_BUILTIN_PSLLDI:
17600 case IX86_BUILTIN_PSLLDI128:
17601 case IX86_BUILTIN_PSLLDI128_MASK:
17602 case IX86_BUILTIN_PSLLDI256:
17603 case IX86_BUILTIN_PSLLDI256_MASK:
17604 case IX86_BUILTIN_PSLLDI512:
17605 case IX86_BUILTIN_PSLLQ:
17606 case IX86_BUILTIN_PSLLQ128:
17607 case IX86_BUILTIN_PSLLQ128_MASK:
17608 case IX86_BUILTIN_PSLLQ256:
17609 case IX86_BUILTIN_PSLLQ256_MASK:
17610 case IX86_BUILTIN_PSLLQ512:
17611 case IX86_BUILTIN_PSLLQI:
17612 case IX86_BUILTIN_PSLLQI128:
17613 case IX86_BUILTIN_PSLLQI128_MASK:
17614 case IX86_BUILTIN_PSLLQI256:
17615 case IX86_BUILTIN_PSLLQI256_MASK:
17616 case IX86_BUILTIN_PSLLQI512:
17617 case IX86_BUILTIN_PSLLW:
17618 case IX86_BUILTIN_PSLLW128:
17619 case IX86_BUILTIN_PSLLW128_MASK:
17620 case IX86_BUILTIN_PSLLW256:
17621 case IX86_BUILTIN_PSLLW256_MASK:
17622 case IX86_BUILTIN_PSLLW512_MASK:
17623 case IX86_BUILTIN_PSLLWI:
17624 case IX86_BUILTIN_PSLLWI128:
17625 case IX86_BUILTIN_PSLLWI128_MASK:
17626 case IX86_BUILTIN_PSLLWI256:
17627 case IX86_BUILTIN_PSLLWI256_MASK:
17628 case IX86_BUILTIN_PSLLWI512_MASK:
17629 rcode = ASHIFT;
17630 is_vshift = false;
17631 goto do_shift;
17632 case IX86_BUILTIN_PSRAD:
17633 case IX86_BUILTIN_PSRAD128:
17634 case IX86_BUILTIN_PSRAD128_MASK:
17635 case IX86_BUILTIN_PSRAD256:
17636 case IX86_BUILTIN_PSRAD256_MASK:
17637 case IX86_BUILTIN_PSRAD512:
17638 case IX86_BUILTIN_PSRADI:
17639 case IX86_BUILTIN_PSRADI128:
17640 case IX86_BUILTIN_PSRADI128_MASK:
17641 case IX86_BUILTIN_PSRADI256:
17642 case IX86_BUILTIN_PSRADI256_MASK:
17643 case IX86_BUILTIN_PSRADI512:
17644 case IX86_BUILTIN_PSRAQ128_MASK:
17645 case IX86_BUILTIN_PSRAQ256_MASK:
17646 case IX86_BUILTIN_PSRAQ512:
17647 case IX86_BUILTIN_PSRAQI128_MASK:
17648 case IX86_BUILTIN_PSRAQI256_MASK:
17649 case IX86_BUILTIN_PSRAQI512:
17650 case IX86_BUILTIN_PSRAW:
17651 case IX86_BUILTIN_PSRAW128:
17652 case IX86_BUILTIN_PSRAW128_MASK:
17653 case IX86_BUILTIN_PSRAW256:
17654 case IX86_BUILTIN_PSRAW256_MASK:
17655 case IX86_BUILTIN_PSRAW512:
17656 case IX86_BUILTIN_PSRAWI:
17657 case IX86_BUILTIN_PSRAWI128:
17658 case IX86_BUILTIN_PSRAWI128_MASK:
17659 case IX86_BUILTIN_PSRAWI256:
17660 case IX86_BUILTIN_PSRAWI256_MASK:
17661 case IX86_BUILTIN_PSRAWI512:
17662 rcode = ASHIFTRT;
17663 is_vshift = false;
17664 goto do_shift;
17665 case IX86_BUILTIN_PSRLD:
17666 case IX86_BUILTIN_PSRLD128:
17667 case IX86_BUILTIN_PSRLD128_MASK:
17668 case IX86_BUILTIN_PSRLD256:
17669 case IX86_BUILTIN_PSRLD256_MASK:
17670 case IX86_BUILTIN_PSRLD512:
17671 case IX86_BUILTIN_PSRLDI:
17672 case IX86_BUILTIN_PSRLDI128:
17673 case IX86_BUILTIN_PSRLDI128_MASK:
17674 case IX86_BUILTIN_PSRLDI256:
17675 case IX86_BUILTIN_PSRLDI256_MASK:
17676 case IX86_BUILTIN_PSRLDI512:
17677 case IX86_BUILTIN_PSRLQ:
17678 case IX86_BUILTIN_PSRLQ128:
17679 case IX86_BUILTIN_PSRLQ128_MASK:
17680 case IX86_BUILTIN_PSRLQ256:
17681 case IX86_BUILTIN_PSRLQ256_MASK:
17682 case IX86_BUILTIN_PSRLQ512:
17683 case IX86_BUILTIN_PSRLQI:
17684 case IX86_BUILTIN_PSRLQI128:
17685 case IX86_BUILTIN_PSRLQI128_MASK:
17686 case IX86_BUILTIN_PSRLQI256:
17687 case IX86_BUILTIN_PSRLQI256_MASK:
17688 case IX86_BUILTIN_PSRLQI512:
17689 case IX86_BUILTIN_PSRLW:
17690 case IX86_BUILTIN_PSRLW128:
17691 case IX86_BUILTIN_PSRLW128_MASK:
17692 case IX86_BUILTIN_PSRLW256:
17693 case IX86_BUILTIN_PSRLW256_MASK:
17694 case IX86_BUILTIN_PSRLW512:
17695 case IX86_BUILTIN_PSRLWI:
17696 case IX86_BUILTIN_PSRLWI128:
17697 case IX86_BUILTIN_PSRLWI128_MASK:
17698 case IX86_BUILTIN_PSRLWI256:
17699 case IX86_BUILTIN_PSRLWI256_MASK:
17700 case IX86_BUILTIN_PSRLWI512:
17701 rcode = LSHIFTRT;
17702 is_vshift = false;
17703 goto do_shift;
17704 case IX86_BUILTIN_PSLLVV16HI:
17705 case IX86_BUILTIN_PSLLVV16SI:
17706 case IX86_BUILTIN_PSLLVV2DI:
17707 case IX86_BUILTIN_PSLLVV2DI_MASK:
17708 case IX86_BUILTIN_PSLLVV32HI:
17709 case IX86_BUILTIN_PSLLVV4DI:
17710 case IX86_BUILTIN_PSLLVV4DI_MASK:
17711 case IX86_BUILTIN_PSLLVV4SI:
17712 case IX86_BUILTIN_PSLLVV4SI_MASK:
17713 case IX86_BUILTIN_PSLLVV8DI:
17714 case IX86_BUILTIN_PSLLVV8HI:
17715 case IX86_BUILTIN_PSLLVV8SI:
17716 case IX86_BUILTIN_PSLLVV8SI_MASK:
17717 rcode = ASHIFT;
17718 is_vshift = true;
17719 goto do_shift;
17720 case IX86_BUILTIN_PSRAVQ128:
17721 case IX86_BUILTIN_PSRAVQ256:
17722 case IX86_BUILTIN_PSRAVV16HI:
17723 case IX86_BUILTIN_PSRAVV16SI:
17724 case IX86_BUILTIN_PSRAVV32HI:
17725 case IX86_BUILTIN_PSRAVV4SI:
17726 case IX86_BUILTIN_PSRAVV4SI_MASK:
17727 case IX86_BUILTIN_PSRAVV8DI:
17728 case IX86_BUILTIN_PSRAVV8HI:
17729 case IX86_BUILTIN_PSRAVV8SI:
17730 case IX86_BUILTIN_PSRAVV8SI_MASK:
17731 rcode = ASHIFTRT;
17732 is_vshift = true;
17733 goto do_shift;
17734 case IX86_BUILTIN_PSRLVV16HI:
17735 case IX86_BUILTIN_PSRLVV16SI:
17736 case IX86_BUILTIN_PSRLVV2DI:
17737 case IX86_BUILTIN_PSRLVV2DI_MASK:
17738 case IX86_BUILTIN_PSRLVV32HI:
17739 case IX86_BUILTIN_PSRLVV4DI:
17740 case IX86_BUILTIN_PSRLVV4DI_MASK:
17741 case IX86_BUILTIN_PSRLVV4SI:
17742 case IX86_BUILTIN_PSRLVV4SI_MASK:
17743 case IX86_BUILTIN_PSRLVV8DI:
17744 case IX86_BUILTIN_PSRLVV8HI:
17745 case IX86_BUILTIN_PSRLVV8SI:
17746 case IX86_BUILTIN_PSRLVV8SI_MASK:
17747 rcode = LSHIFTRT;
17748 is_vshift = true;
17749 goto do_shift;
17751 do_shift:
17752 gcc_assert (n_args >= 2);
17753 arg0 = gimple_call_arg (stmt, 0);
17754 arg1 = gimple_call_arg (stmt, 1);
17755 if (n_args > 2)
17757 /* This is masked shift. Only optimize if the mask is all ones. */
17758 tree argl = gimple_call_arg (stmt, n_args - 1);
17759 if (!tree_fits_uhwi_p (argl))
17760 break;
17761 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
17762 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
17763 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17764 break;
17766 if (is_vshift)
17768 if (TREE_CODE (arg1) != VECTOR_CST)
17769 break;
17770 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
17771 if (integer_zerop (arg1))
17772 count = 0;
17773 else if (rcode == ASHIFTRT)
17774 break;
17775 else
17776 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
17778 tree elt = VECTOR_CST_ELT (arg1, i);
17779 if (!wi::neg_p (wi::to_wide (elt))
17780 && wi::to_widest (elt) < count)
17781 return false;
17784 else
17786 arg1 = ix86_vector_shift_count (arg1);
17787 if (!arg1)
17788 break;
17789 count = tree_to_uhwi (arg1);
17791 if (count == 0)
17793 /* Just return the first argument for shift by 0. */
17794 location_t loc = gimple_location (stmt);
17795 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17796 gimple_set_location (g, loc);
17797 gsi_replace (gsi, g, false);
17798 return true;
17800 if (rcode != ASHIFTRT
17801 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
17803 /* For shift counts equal or greater than precision, except for
17804 arithmetic right shift the result is zero. */
17805 location_t loc = gimple_location (stmt);
17806 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17807 build_zero_cst (TREE_TYPE (arg0)));
17808 gimple_set_location (g, loc);
17809 gsi_replace (gsi, g, false);
17810 return true;
17812 break;
17814 case IX86_BUILTIN_SHUFPD:
17815 arg2 = gimple_call_arg (stmt, 2);
17816 if (TREE_CODE (arg2) == INTEGER_CST)
17818 location_t loc = gimple_location (stmt);
17819 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
17820 arg0 = gimple_call_arg (stmt, 0);
17821 arg1 = gimple_call_arg (stmt, 1);
17822 tree itype = long_long_integer_type_node;
17823 tree vtype = build_vector_type (itype, 2); /* V2DI */
17824 tree_vector_builder elts (vtype, 2, 1);
17825 /* Ignore bits other than the lowest 2. */
17826 elts.quick_push (build_int_cst (itype, imask & 1));
17827 imask >>= 1;
17828 elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
17829 tree omask = elts.build ();
17830 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
17831 VEC_PERM_EXPR,
17832 arg0, arg1, omask);
17833 gimple_set_location (g, loc);
17834 gsi_replace (gsi, g, false);
17835 return true;
17837 // Do not error yet, the constant could be propagated later?
17838 break;
17840 default:
17841 break;
17844 return false;
17847 /* Handler for an SVML-style interface to
17848 a library with vectorized intrinsics. */
17850 tree
17851 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
17853 char name[20];
17854 tree fntype, new_fndecl, args;
17855 unsigned arity;
17856 const char *bname;
17857 machine_mode el_mode, in_mode;
17858 int n, in_n;
17860 /* The SVML is suitable for unsafe math only. */
17861 if (!flag_unsafe_math_optimizations)
17862 return NULL_TREE;
17864 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17865 n = TYPE_VECTOR_SUBPARTS (type_out);
17866 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17867 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17868 if (el_mode != in_mode
17869 || n != in_n)
17870 return NULL_TREE;
17872 switch (fn)
17874 CASE_CFN_EXP:
17875 CASE_CFN_LOG:
17876 CASE_CFN_LOG10:
17877 CASE_CFN_POW:
17878 CASE_CFN_TANH:
17879 CASE_CFN_TAN:
17880 CASE_CFN_ATAN:
17881 CASE_CFN_ATAN2:
17882 CASE_CFN_ATANH:
17883 CASE_CFN_CBRT:
17884 CASE_CFN_SINH:
17885 CASE_CFN_SIN:
17886 CASE_CFN_ASINH:
17887 CASE_CFN_ASIN:
17888 CASE_CFN_COSH:
17889 CASE_CFN_COS:
17890 CASE_CFN_ACOSH:
17891 CASE_CFN_ACOS:
17892 if ((el_mode != DFmode || n != 2)
17893 && (el_mode != SFmode || n != 4))
17894 return NULL_TREE;
17895 break;
17897 default:
17898 return NULL_TREE;
17901 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
17902 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
17904 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
17905 strcpy (name, "vmlsLn4");
17906 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
17907 strcpy (name, "vmldLn2");
17908 else if (n == 4)
17910 sprintf (name, "vmls%s", bname+10);
17911 name[strlen (name)-1] = '4';
17913 else
17914 sprintf (name, "vmld%s2", bname+10);
17916 /* Convert to uppercase. */
17917 name[4] &= ~0x20;
17919 arity = 0;
17920 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
17921 arity++;
17923 if (arity == 1)
17924 fntype = build_function_type_list (type_out, type_in, NULL);
17925 else
17926 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
17928 /* Build a function declaration for the vectorized function. */
17929 new_fndecl = build_decl (BUILTINS_LOCATION,
17930 FUNCTION_DECL, get_identifier (name), fntype);
17931 TREE_PUBLIC (new_fndecl) = 1;
17932 DECL_EXTERNAL (new_fndecl) = 1;
17933 DECL_IS_NOVOPS (new_fndecl) = 1;
17934 TREE_READONLY (new_fndecl) = 1;
17936 return new_fndecl;
17939 /* Handler for an ACML-style interface to
17940 a library with vectorized intrinsics. */
17942 tree
17943 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
17945 char name[20] = "__vr.._";
17946 tree fntype, new_fndecl, args;
17947 unsigned arity;
17948 const char *bname;
17949 machine_mode el_mode, in_mode;
17950 int n, in_n;
17952 /* The ACML is 64bits only and suitable for unsafe math only as
17953 it does not correctly support parts of IEEE with the required
17954 precision such as denormals. */
17955 if (!TARGET_64BIT
17956 || !flag_unsafe_math_optimizations)
17957 return NULL_TREE;
17959 el_mode = TYPE_MODE (TREE_TYPE (type_out));
17960 n = TYPE_VECTOR_SUBPARTS (type_out);
17961 in_mode = TYPE_MODE (TREE_TYPE (type_in));
17962 in_n = TYPE_VECTOR_SUBPARTS (type_in);
17963 if (el_mode != in_mode
17964 || n != in_n)
17965 return NULL_TREE;
17967 switch (fn)
17969 CASE_CFN_SIN:
17970 CASE_CFN_COS:
17971 CASE_CFN_EXP:
17972 CASE_CFN_LOG:
17973 CASE_CFN_LOG2:
17974 CASE_CFN_LOG10:
17975 if (el_mode == DFmode && n == 2)
17977 name[4] = 'd';
17978 name[5] = '2';
17980 else if (el_mode == SFmode && n == 4)
17982 name[4] = 's';
17983 name[5] = '4';
17985 else
17986 return NULL_TREE;
17987 break;
17989 default:
17990 return NULL_TREE;
17993 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
17994 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
17995 sprintf (name + 7, "%s", bname+10);
17997 arity = 0;
17998 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
17999 arity++;
18001 if (arity == 1)
18002 fntype = build_function_type_list (type_out, type_in, NULL);
18003 else
18004 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18006 /* Build a function declaration for the vectorized function. */
18007 new_fndecl = build_decl (BUILTINS_LOCATION,
18008 FUNCTION_DECL, get_identifier (name), fntype);
18009 TREE_PUBLIC (new_fndecl) = 1;
18010 DECL_EXTERNAL (new_fndecl) = 1;
18011 DECL_IS_NOVOPS (new_fndecl) = 1;
18012 TREE_READONLY (new_fndecl) = 1;
18014 return new_fndecl;
18017 /* Returns a decl of a function that implements scatter store with
18018 register type VECTYPE and index type INDEX_TYPE and SCALE.
18019 Return NULL_TREE if it is not available. */
18021 static tree
18022 ix86_vectorize_builtin_scatter (const_tree vectype,
18023 const_tree index_type, int scale)
18025 bool si;
18026 enum ix86_builtins code;
18028 if (!TARGET_AVX512F)
18029 return NULL_TREE;
18031 if ((TREE_CODE (index_type) != INTEGER_TYPE
18032 && !POINTER_TYPE_P (index_type))
18033 || (TYPE_MODE (index_type) != SImode
18034 && TYPE_MODE (index_type) != DImode))
18035 return NULL_TREE;
18037 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18038 return NULL_TREE;
18040 /* v*scatter* insn sign extends index to pointer mode. */
18041 if (TYPE_PRECISION (index_type) < POINTER_SIZE
18042 && TYPE_UNSIGNED (index_type))
18043 return NULL_TREE;
18045 /* Scale can be 1, 2, 4 or 8. */
18046 if (scale <= 0
18047 || scale > 8
18048 || (scale & (scale - 1)) != 0)
18049 return NULL_TREE;
18051 si = TYPE_MODE (index_type) == SImode;
18052 switch (TYPE_MODE (vectype))
18054 case E_V8DFmode:
18055 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18056 break;
18057 case E_V8DImode:
18058 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18059 break;
18060 case E_V16SFmode:
18061 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18062 break;
18063 case E_V16SImode:
18064 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18065 break;
18066 case E_V4DFmode:
18067 if (TARGET_AVX512VL)
18068 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18069 else
18070 return NULL_TREE;
18071 break;
18072 case E_V4DImode:
18073 if (TARGET_AVX512VL)
18074 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18075 else
18076 return NULL_TREE;
18077 break;
18078 case E_V8SFmode:
18079 if (TARGET_AVX512VL)
18080 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18081 else
18082 return NULL_TREE;
18083 break;
18084 case E_V8SImode:
18085 if (TARGET_AVX512VL)
18086 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18087 else
18088 return NULL_TREE;
18089 break;
18090 case E_V2DFmode:
18091 if (TARGET_AVX512VL)
18092 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18093 else
18094 return NULL_TREE;
18095 break;
18096 case E_V2DImode:
18097 if (TARGET_AVX512VL)
18098 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18099 else
18100 return NULL_TREE;
18101 break;
18102 case E_V4SFmode:
18103 if (TARGET_AVX512VL)
18104 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18105 else
18106 return NULL_TREE;
18107 break;
18108 case E_V4SImode:
18109 if (TARGET_AVX512VL)
18110 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
18111 else
18112 return NULL_TREE;
18113 break;
18114 default:
18115 return NULL_TREE;
18118 return get_ix86_builtin (code);
18121 /* Return true if it is safe to use the rsqrt optabs to optimize
18122 1.0/sqrt. */
18124 static bool
18125 use_rsqrt_p ()
18127 return (TARGET_SSE && TARGET_SSE_MATH
18128 && flag_finite_math_only
18129 && !flag_trapping_math
18130 && flag_unsafe_math_optimizations);
18133 /* Helper for avx_vpermilps256_operand et al. This is also used by
18134 the expansion functions to turn the parallel back into a mask.
18135 The return value is 0 for no match and the imm8+1 for a match. */
18138 avx_vpermilp_parallel (rtx par, machine_mode mode)
18140 unsigned i, nelt = GET_MODE_NUNITS (mode);
18141 unsigned mask = 0;
18142 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
18144 if (XVECLEN (par, 0) != (int) nelt)
18145 return 0;
18147 /* Validate that all of the elements are constants, and not totally
18148 out of range. Copy the data into an integral array to make the
18149 subsequent checks easier. */
18150 for (i = 0; i < nelt; ++i)
18152 rtx er = XVECEXP (par, 0, i);
18153 unsigned HOST_WIDE_INT ei;
18155 if (!CONST_INT_P (er))
18156 return 0;
18157 ei = INTVAL (er);
18158 if (ei >= nelt)
18159 return 0;
18160 ipar[i] = ei;
18163 switch (mode)
18165 case E_V8DFmode:
18166 /* In the 512-bit DFmode case, we can only move elements within
18167 a 128-bit lane. First fill the second part of the mask,
18168 then fallthru. */
18169 for (i = 4; i < 6; ++i)
18171 if (ipar[i] < 4 || ipar[i] >= 6)
18172 return 0;
18173 mask |= (ipar[i] - 4) << i;
18175 for (i = 6; i < 8; ++i)
18177 if (ipar[i] < 6)
18178 return 0;
18179 mask |= (ipar[i] - 6) << i;
18181 /* FALLTHRU */
18183 case E_V4DFmode:
18184 /* In the 256-bit DFmode case, we can only move elements within
18185 a 128-bit lane. */
18186 for (i = 0; i < 2; ++i)
18188 if (ipar[i] >= 2)
18189 return 0;
18190 mask |= ipar[i] << i;
18192 for (i = 2; i < 4; ++i)
18194 if (ipar[i] < 2)
18195 return 0;
18196 mask |= (ipar[i] - 2) << i;
18198 break;
18200 case E_V16SFmode:
18201 /* In 512 bit SFmode case, permutation in the upper 256 bits
18202 must mirror the permutation in the lower 256-bits. */
18203 for (i = 0; i < 8; ++i)
18204 if (ipar[i] + 8 != ipar[i + 8])
18205 return 0;
18206 /* FALLTHRU */
18208 case E_V8SFmode:
18209 /* In 256 bit SFmode case, we have full freedom of
18210 movement within the low 128-bit lane, but the high 128-bit
18211 lane must mirror the exact same pattern. */
18212 for (i = 0; i < 4; ++i)
18213 if (ipar[i] + 4 != ipar[i + 4])
18214 return 0;
18215 nelt = 4;
18216 /* FALLTHRU */
18218 case E_V2DFmode:
18219 case E_V4SFmode:
18220 /* In the 128-bit case, we've full freedom in the placement of
18221 the elements from the source operand. */
18222 for (i = 0; i < nelt; ++i)
18223 mask |= ipar[i] << (i * (nelt / 2));
18224 break;
18226 default:
18227 gcc_unreachable ();
18230 /* Make sure success has a non-zero value by adding one. */
18231 return mask + 1;
18234 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18235 the expansion functions to turn the parallel back into a mask.
18236 The return value is 0 for no match and the imm8+1 for a match. */
18239 avx_vperm2f128_parallel (rtx par, machine_mode mode)
18241 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18242 unsigned mask = 0;
18243 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
18245 if (XVECLEN (par, 0) != (int) nelt)
18246 return 0;
18248 /* Validate that all of the elements are constants, and not totally
18249 out of range. Copy the data into an integral array to make the
18250 subsequent checks easier. */
18251 for (i = 0; i < nelt; ++i)
18253 rtx er = XVECEXP (par, 0, i);
18254 unsigned HOST_WIDE_INT ei;
18256 if (!CONST_INT_P (er))
18257 return 0;
18258 ei = INTVAL (er);
18259 if (ei >= 2 * nelt)
18260 return 0;
18261 ipar[i] = ei;
18264 /* Validate that the halves of the permute are halves. */
18265 for (i = 0; i < nelt2 - 1; ++i)
18266 if (ipar[i] + 1 != ipar[i + 1])
18267 return 0;
18268 for (i = nelt2; i < nelt - 1; ++i)
18269 if (ipar[i] + 1 != ipar[i + 1])
18270 return 0;
18272 /* Reconstruct the mask. */
18273 for (i = 0; i < 2; ++i)
18275 unsigned e = ipar[i * nelt2];
18276 if (e % nelt2)
18277 return 0;
18278 e /= nelt2;
18279 mask |= e << (i * 4);
18282 /* Make sure success has a non-zero value by adding one. */
18283 return mask + 1;
18286 /* Return a register priority for hard reg REGNO. */
18287 static int
18288 ix86_register_priority (int hard_regno)
18290 /* ebp and r13 as the base always wants a displacement, r12 as the
18291 base always wants an index. So discourage their usage in an
18292 address. */
18293 if (hard_regno == R12_REG || hard_regno == R13_REG)
18294 return 0;
18295 if (hard_regno == BP_REG)
18296 return 1;
18297 /* New x86-64 int registers result in bigger code size. Discourage
18298 them. */
18299 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18300 return 2;
18301 /* New x86-64 SSE registers result in bigger code size. Discourage
18302 them. */
18303 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18304 return 2;
18305 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18306 return 1;
18307 /* Usage of AX register results in smaller code. Prefer it. */
18308 if (hard_regno == AX_REG)
18309 return 4;
18310 return 3;
18313 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18315 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18316 QImode must go into class Q_REGS.
18317 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18318 movdf to do mem-to-mem moves through integer regs. */
18320 static reg_class_t
18321 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18323 machine_mode mode = GET_MODE (x);
18325 /* We're only allowed to return a subclass of CLASS. Many of the
18326 following checks fail for NO_REGS, so eliminate that early. */
18327 if (regclass == NO_REGS)
18328 return NO_REGS;
18330 /* All classes can load zeros. */
18331 if (x == CONST0_RTX (mode))
18332 return regclass;
18334 /* Force constants into memory if we are loading a (nonzero) constant into
18335 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18336 instructions to load from a constant. */
18337 if (CONSTANT_P (x)
18338 && (MAYBE_MMX_CLASS_P (regclass)
18339 || MAYBE_SSE_CLASS_P (regclass)
18340 || MAYBE_MASK_CLASS_P (regclass)))
18341 return NO_REGS;
18343 /* Floating-point constants need more complex checks. */
18344 if (CONST_DOUBLE_P (x))
18346 /* General regs can load everything. */
18347 if (INTEGER_CLASS_P (regclass))
18348 return regclass;
18350 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18351 zero above. We only want to wind up preferring 80387 registers if
18352 we plan on doing computation with them. */
18353 if (IS_STACK_MODE (mode)
18354 && standard_80387_constant_p (x) > 0)
18356 /* Limit class to FP regs. */
18357 if (FLOAT_CLASS_P (regclass))
18358 return FLOAT_REGS;
18361 return NO_REGS;
18364 /* Prefer SSE regs only, if we can use them for math. */
18365 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18366 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18368 /* Generally when we see PLUS here, it's the function invariant
18369 (plus soft-fp const_int). Which can only be computed into general
18370 regs. */
18371 if (GET_CODE (x) == PLUS)
18372 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18374 /* QImode constants are easy to load, but non-constant QImode data
18375 must go into Q_REGS. */
18376 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18378 if (Q_CLASS_P (regclass))
18379 return regclass;
18380 else if (reg_class_subset_p (Q_REGS, regclass))
18381 return Q_REGS;
18382 else
18383 return NO_REGS;
18386 return regclass;
18389 /* Discourage putting floating-point values in SSE registers unless
18390 SSE math is being used, and likewise for the 387 registers. */
18391 static reg_class_t
18392 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18394 /* Restrict the output reload class to the register bank that we are doing
18395 math on. If we would like not to return a subset of CLASS, reject this
18396 alternative: if reload cannot do this, it will still use its choice. */
18397 machine_mode mode = GET_MODE (x);
18398 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18399 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18401 if (IS_STACK_MODE (mode))
18402 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18404 return regclass;
18407 static reg_class_t
18408 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18409 machine_mode mode, secondary_reload_info *sri)
18411 /* Double-word spills from general registers to non-offsettable memory
18412 references (zero-extended addresses) require special handling. */
18413 if (TARGET_64BIT
18414 && MEM_P (x)
18415 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18416 && INTEGER_CLASS_P (rclass)
18417 && !offsettable_memref_p (x))
18419 sri->icode = (in_p
18420 ? CODE_FOR_reload_noff_load
18421 : CODE_FOR_reload_noff_store);
18422 /* Add the cost of moving address to a temporary. */
18423 sri->extra_cost = 1;
18425 return NO_REGS;
18428 /* QImode spills from non-QI registers require
18429 intermediate register on 32bit targets. */
18430 if (mode == QImode
18431 && ((!TARGET_64BIT && !in_p
18432 && INTEGER_CLASS_P (rclass)
18433 && MAYBE_NON_Q_CLASS_P (rclass))
18434 || (!TARGET_AVX512DQ
18435 && MAYBE_MASK_CLASS_P (rclass))))
18437 int regno = true_regnum (x);
18439 /* Return Q_REGS if the operand is in memory. */
18440 if (regno == -1)
18441 return Q_REGS;
18443 return NO_REGS;
18446 /* This condition handles corner case where an expression involving
18447 pointers gets vectorized. We're trying to use the address of a
18448 stack slot as a vector initializer.
18450 (set (reg:V2DI 74 [ vect_cst_.2 ])
18451 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18453 Eventually frame gets turned into sp+offset like this:
18455 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18456 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18457 (const_int 392 [0x188]))))
18459 That later gets turned into:
18461 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18462 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18463 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18465 We'll have the following reload recorded:
18467 Reload 0: reload_in (DI) =
18468 (plus:DI (reg/f:DI 7 sp)
18469 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18470 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18471 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18472 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18473 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18474 reload_reg_rtx: (reg:V2DI 22 xmm1)
18476 Which isn't going to work since SSE instructions can't handle scalar
18477 additions. Returning GENERAL_REGS forces the addition into integer
18478 register and reload can handle subsequent reloads without problems. */
18480 if (in_p && GET_CODE (x) == PLUS
18481 && SSE_CLASS_P (rclass)
18482 && SCALAR_INT_MODE_P (mode))
18483 return GENERAL_REGS;
18485 return NO_REGS;
18488 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18490 static bool
18491 ix86_class_likely_spilled_p (reg_class_t rclass)
18493 switch (rclass)
18495 case AREG:
18496 case DREG:
18497 case CREG:
18498 case BREG:
18499 case AD_REGS:
18500 case SIREG:
18501 case DIREG:
18502 case SSE_FIRST_REG:
18503 case FP_TOP_REG:
18504 case FP_SECOND_REG:
18505 return true;
18507 default:
18508 break;
18511 return false;
18514 /* If we are copying between registers from different register sets
18515 (e.g. FP and integer), we may need a memory location.
18517 The function can't work reliably when one of the CLASSES is a class
18518 containing registers from multiple sets. We avoid this by never combining
18519 different sets in a single alternative in the machine description.
18520 Ensure that this constraint holds to avoid unexpected surprises.
18522 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18523 so do not enforce these sanity checks.
18525 To optimize register_move_cost performance, define inline variant. */
18527 static inline bool
18528 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18529 reg_class_t class2, int strict)
18531 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18532 return false;
18534 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18535 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18536 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18537 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18538 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18539 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18540 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18541 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18543 gcc_assert (!strict || lra_in_progress);
18544 return true;
18547 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18548 return true;
18550 /* ??? This is a lie. We do have moves between mmx/general, and for
18551 mmx/sse2. But by saying we need secondary memory we discourage the
18552 register allocator from using the mmx registers unless needed. */
18553 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18554 return true;
18556 /* Between mask and general, we have moves no larger than word size. */
18557 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18559 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18560 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18561 return true;
18564 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18566 /* SSE1 doesn't have any direct moves from other classes. */
18567 if (!TARGET_SSE2)
18568 return true;
18570 /* Between SSE and general, we have moves no larger than word size. */
18571 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18572 || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
18573 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18574 return true;
18576 /* If the target says that inter-unit moves are more expensive
18577 than moving through memory, then don't generate them. */
18578 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18579 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18580 return true;
18583 return false;
18586 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18588 static bool
18589 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18590 reg_class_t class2)
18592 return inline_secondary_memory_needed (mode, class1, class2, true);
18595 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18597 get_secondary_mem widens integral modes to BITS_PER_WORD.
18598 There is no need to emit full 64 bit move on 64 bit targets
18599 for integral modes that can be moved using 32 bit move. */
18601 static machine_mode
18602 ix86_secondary_memory_needed_mode (machine_mode mode)
18604 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18605 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
18606 return mode;
18609 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18611 On the 80386, this is the size of MODE in words,
18612 except in the FP regs, where a single reg is always enough. */
18614 static unsigned char
18615 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
18617 if (MAYBE_INTEGER_CLASS_P (rclass))
18619 if (mode == XFmode)
18620 return (TARGET_64BIT ? 2 : 3);
18621 else if (mode == XCmode)
18622 return (TARGET_64BIT ? 4 : 6);
18623 else
18624 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18626 else
18628 if (COMPLEX_MODE_P (mode))
18629 return 2;
18630 else
18631 return 1;
18635 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18637 static bool
18638 ix86_can_change_mode_class (machine_mode from, machine_mode to,
18639 reg_class_t regclass)
18641 if (from == to)
18642 return true;
18644 /* x87 registers can't do subreg at all, as all values are reformatted
18645 to extended precision. */
18646 if (MAYBE_FLOAT_CLASS_P (regclass))
18647 return false;
18649 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
18651 /* Vector registers do not support QI or HImode loads. If we don't
18652 disallow a change to these modes, reload will assume it's ok to
18653 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18654 the vec_dupv4hi pattern. */
18655 if (GET_MODE_SIZE (from) < 4)
18656 return false;
18659 return true;
18662 /* Return index of MODE in the sse load/store tables. */
18664 static inline int
18665 sse_store_index (machine_mode mode)
18667 switch (GET_MODE_SIZE (mode))
18669 case 4:
18670 return 0;
18671 case 8:
18672 return 1;
18673 case 16:
18674 return 2;
18675 case 32:
18676 return 3;
18677 case 64:
18678 return 4;
18679 default:
18680 return -1;
18684 /* Return the cost of moving data of mode M between a
18685 register and memory. A value of 2 is the default; this cost is
18686 relative to those in `REGISTER_MOVE_COST'.
18688 This function is used extensively by register_move_cost that is used to
18689 build tables at startup. Make it inline in this case.
18690 When IN is 2, return maximum of in and out move cost.
18692 If moving between registers and memory is more expensive than
18693 between two registers, you should define this macro to express the
18694 relative cost.
18696 Model also increased moving costs of QImode registers in non
18697 Q_REGS classes.
18699 static inline int
18700 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
18702 int cost;
18703 if (FLOAT_CLASS_P (regclass))
18705 int index;
18706 switch (mode)
18708 case E_SFmode:
18709 index = 0;
18710 break;
18711 case E_DFmode:
18712 index = 1;
18713 break;
18714 case E_XFmode:
18715 index = 2;
18716 break;
18717 default:
18718 return 100;
18720 if (in == 2)
18721 return MAX (ix86_cost->hard_register.fp_load [index],
18722 ix86_cost->hard_register.fp_store [index]);
18723 return in ? ix86_cost->hard_register.fp_load [index]
18724 : ix86_cost->hard_register.fp_store [index];
18726 if (SSE_CLASS_P (regclass))
18728 int index = sse_store_index (mode);
18729 if (index == -1)
18730 return 100;
18731 if (in == 2)
18732 return MAX (ix86_cost->hard_register.sse_load [index],
18733 ix86_cost->hard_register.sse_store [index]);
18734 return in ? ix86_cost->hard_register.sse_load [index]
18735 : ix86_cost->hard_register.sse_store [index];
18737 if (MMX_CLASS_P (regclass))
18739 int index;
18740 switch (GET_MODE_SIZE (mode))
18742 case 4:
18743 index = 0;
18744 break;
18745 case 8:
18746 index = 1;
18747 break;
18748 default:
18749 return 100;
18751 if (in == 2)
18752 return MAX (ix86_cost->hard_register.mmx_load [index],
18753 ix86_cost->hard_register.mmx_store [index]);
18754 return in ? ix86_cost->hard_register.mmx_load [index]
18755 : ix86_cost->hard_register.mmx_store [index];
18757 switch (GET_MODE_SIZE (mode))
18759 case 1:
18760 if (Q_CLASS_P (regclass) || TARGET_64BIT)
18762 if (!in)
18763 return ix86_cost->hard_register.int_store[0];
18764 if (TARGET_PARTIAL_REG_DEPENDENCY
18765 && optimize_function_for_speed_p (cfun))
18766 cost = ix86_cost->hard_register.movzbl_load;
18767 else
18768 cost = ix86_cost->hard_register.int_load[0];
18769 if (in == 2)
18770 return MAX (cost, ix86_cost->hard_register.int_store[0]);
18771 return cost;
18773 else
18775 if (in == 2)
18776 return MAX (ix86_cost->hard_register.movzbl_load,
18777 ix86_cost->hard_register.int_store[0] + 4);
18778 if (in)
18779 return ix86_cost->hard_register.movzbl_load;
18780 else
18781 return ix86_cost->hard_register.int_store[0] + 4;
18783 break;
18784 case 2:
18785 if (in == 2)
18786 return MAX (ix86_cost->hard_register.int_load[1],
18787 ix86_cost->hard_register.int_store[1]);
18788 return in ? ix86_cost->hard_register.int_load[1]
18789 : ix86_cost->hard_register.int_store[1];
18790 default:
18791 if (in == 2)
18792 cost = MAX (ix86_cost->hard_register.int_load[2],
18793 ix86_cost->hard_register.int_store[2]);
18794 else if (in)
18795 cost = ix86_cost->hard_register.int_load[2];
18796 else
18797 cost = ix86_cost->hard_register.int_store[2];
18798 /* Multiply with the number of GPR moves needed. */
18799 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
18803 static int
18804 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
18806 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
18810 /* Return the cost of moving data from a register in class CLASS1 to
18811 one in class CLASS2.
18813 It is not required that the cost always equal 2 when FROM is the same as TO;
18814 on some machines it is expensive to move between registers if they are not
18815 general registers. */
18817 static int
18818 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
18819 reg_class_t class2_i)
18821 enum reg_class class1 = (enum reg_class) class1_i;
18822 enum reg_class class2 = (enum reg_class) class2_i;
18824 /* In case we require secondary memory, compute cost of the store followed
18825 by load. In order to avoid bad register allocation choices, we need
18826 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18828 if (inline_secondary_memory_needed (mode, class1, class2, false))
18830 int cost = 1;
18832 cost += inline_memory_move_cost (mode, class1, 2);
18833 cost += inline_memory_move_cost (mode, class2, 2);
18835 /* In case of copying from general_purpose_register we may emit multiple
18836 stores followed by single load causing memory size mismatch stall.
18837 Count this as arbitrarily high cost of 20. */
18838 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
18839 && TARGET_MEMORY_MISMATCH_STALL
18840 && targetm.class_max_nregs (class1, mode)
18841 > targetm.class_max_nregs (class2, mode))
18842 cost += 20;
18844 /* In the case of FP/MMX moves, the registers actually overlap, and we
18845 have to switch modes in order to treat them differently. */
18846 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18847 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18848 cost += 20;
18850 return cost;
18853 /* Moves between MMX and non-MMX units require secondary memory. */
18854 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18855 gcc_unreachable ();
18857 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18858 return (SSE_CLASS_P (class1)
18859 ? ix86_cost->hard_register.sse_to_integer
18860 : ix86_cost->hard_register.integer_to_sse);
18862 if (MAYBE_FLOAT_CLASS_P (class1))
18863 return ix86_cost->hard_register.fp_move;
18864 if (MAYBE_SSE_CLASS_P (class1))
18866 if (GET_MODE_BITSIZE (mode) <= 128)
18867 return ix86_cost->hard_register.xmm_move;
18868 if (GET_MODE_BITSIZE (mode) <= 256)
18869 return ix86_cost->hard_register.ymm_move;
18870 return ix86_cost->hard_register.zmm_move;
18872 if (MAYBE_MMX_CLASS_P (class1))
18873 return ix86_cost->hard_register.mmx_move;
18874 return 2;
18877 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
18878 words of a value of mode MODE but can be less for certain modes in
18879 special long registers.
18881 Actually there are no two word move instructions for consecutive
18882 registers. And only registers 0-3 may have mov byte instructions
18883 applied to them. */
18885 static unsigned int
18886 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
18888 if (GENERAL_REGNO_P (regno))
18890 if (mode == XFmode)
18891 return TARGET_64BIT ? 2 : 3;
18892 if (mode == XCmode)
18893 return TARGET_64BIT ? 4 : 6;
18894 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
18896 if (COMPLEX_MODE_P (mode))
18897 return 2;
18898 /* Register pair for mask registers. */
18899 if (mode == P2QImode || mode == P2HImode)
18900 return 2;
18901 if (mode == V64SFmode || mode == V64SImode)
18902 return 4;
18903 return 1;
18906 /* Implement REGMODE_NATURAL_SIZE(MODE). */
18907 unsigned int
18908 ix86_regmode_natural_size (machine_mode mode)
18910 if (mode == P2HImode || mode == P2QImode)
18911 return GET_MODE_SIZE (mode) / 2;
18912 return UNITS_PER_WORD;
18915 /* Implement TARGET_HARD_REGNO_MODE_OK. */
18917 static bool
18918 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
18920 /* Flags and only flags can only hold CCmode values. */
18921 if (CC_REGNO_P (regno))
18922 return GET_MODE_CLASS (mode) == MODE_CC;
18923 if (GET_MODE_CLASS (mode) == MODE_CC
18924 || GET_MODE_CLASS (mode) == MODE_RANDOM)
18925 return false;
18926 if (STACK_REGNO_P (regno))
18927 return VALID_FP_MODE_P (mode);
18928 if (MASK_REGNO_P (regno))
18930 /* Register pair only starts at even register number. */
18931 if ((mode == P2QImode || mode == P2HImode))
18932 return MASK_PAIR_REGNO_P(regno);
18934 return (VALID_MASK_REG_MODE (mode)
18935 || (TARGET_AVX512BW
18936 && VALID_MASK_AVX512BW_MODE (mode)));
18939 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18940 return false;
18942 if (SSE_REGNO_P (regno))
18944 /* We implement the move patterns for all vector modes into and
18945 out of SSE registers, even when no operation instructions
18946 are available. */
18948 /* For AVX-512 we allow, regardless of regno:
18949 - XI mode
18950 - any of 512-bit wide vector mode
18951 - any scalar mode. */
18952 if (TARGET_AVX512F
18953 && (mode == XImode
18954 || VALID_AVX512F_REG_MODE (mode)
18955 || VALID_AVX512F_SCALAR_MODE (mode)))
18956 return true;
18958 /* For AVX-5124FMAPS or AVX-5124VNNIW
18959 allow V64SF and V64SI modes for special regnos. */
18960 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
18961 && (mode == V64SFmode || mode == V64SImode)
18962 && MOD4_SSE_REGNO_P (regno))
18963 return true;
18965 /* TODO check for QI/HI scalars. */
18966 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
18967 if (TARGET_AVX512VL
18968 && (mode == OImode
18969 || mode == TImode
18970 || VALID_AVX256_REG_MODE (mode)
18971 || VALID_AVX512VL_128_REG_MODE (mode)))
18972 return true;
18974 /* xmm16-xmm31 are only available for AVX-512. */
18975 if (EXT_REX_SSE_REGNO_P (regno))
18976 return false;
18978 /* OImode and AVX modes are available only when AVX is enabled. */
18979 return ((TARGET_AVX
18980 && VALID_AVX256_REG_OR_OI_MODE (mode))
18981 || VALID_SSE_REG_MODE (mode)
18982 || VALID_SSE2_REG_MODE (mode)
18983 || VALID_MMX_REG_MODE (mode)
18984 || VALID_MMX_REG_MODE_3DNOW (mode));
18986 if (MMX_REGNO_P (regno))
18988 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18989 so if the register is available at all, then we can move data of
18990 the given mode into or out of it. */
18991 return (VALID_MMX_REG_MODE (mode)
18992 || VALID_MMX_REG_MODE_3DNOW (mode));
18995 if (mode == QImode)
18997 /* Take care for QImode values - they can be in non-QI regs,
18998 but then they do cause partial register stalls. */
18999 if (ANY_QI_REGNO_P (regno))
19000 return true;
19001 if (!TARGET_PARTIAL_REG_STALL)
19002 return true;
19003 /* LRA checks if the hard register is OK for the given mode.
19004 QImode values can live in non-QI regs, so we allow all
19005 registers here. */
19006 if (lra_in_progress)
19007 return true;
19008 return !can_create_pseudo_p ();
19010 /* We handle both integer and floats in the general purpose registers. */
19011 else if (VALID_INT_MODE_P (mode))
19012 return true;
19013 else if (VALID_FP_MODE_P (mode))
19014 return true;
19015 else if (VALID_DFP_MODE_P (mode))
19016 return true;
19017 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19018 on to use that value in smaller contexts, this can easily force a
19019 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19020 supporting DImode, allow it. */
19021 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19022 return true;
19024 return false;
19027 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
19028 saves SSE registers across calls is Win64 (thus no need to check the
19029 current ABI here), and with AVX enabled Win64 only guarantees that
19030 the low 16 bytes are saved. */
19032 static bool
19033 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
19034 machine_mode mode)
19036 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
19039 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19040 tieable integer mode. */
19042 static bool
19043 ix86_tieable_integer_mode_p (machine_mode mode)
19045 switch (mode)
19047 case E_HImode:
19048 case E_SImode:
19049 return true;
19051 case E_QImode:
19052 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19054 case E_DImode:
19055 return TARGET_64BIT;
19057 default:
19058 return false;
19062 /* Implement TARGET_MODES_TIEABLE_P.
19064 Return true if MODE1 is accessible in a register that can hold MODE2
19065 without copying. That is, all register classes that can hold MODE2
19066 can also hold MODE1. */
19068 static bool
19069 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
19071 if (mode1 == mode2)
19072 return true;
19074 if (ix86_tieable_integer_mode_p (mode1)
19075 && ix86_tieable_integer_mode_p (mode2))
19076 return true;
19078 /* MODE2 being XFmode implies fp stack or general regs, which means we
19079 can tie any smaller floating point modes to it. Note that we do not
19080 tie this with TFmode. */
19081 if (mode2 == XFmode)
19082 return mode1 == SFmode || mode1 == DFmode;
19084 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19085 that we can tie it with SFmode. */
19086 if (mode2 == DFmode)
19087 return mode1 == SFmode;
19089 /* If MODE2 is only appropriate for an SSE register, then tie with
19090 any other mode acceptable to SSE registers. */
19091 if (GET_MODE_SIZE (mode2) == 64
19092 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19093 return (GET_MODE_SIZE (mode1) == 64
19094 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19095 if (GET_MODE_SIZE (mode2) == 32
19096 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19097 return (GET_MODE_SIZE (mode1) == 32
19098 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19099 if (GET_MODE_SIZE (mode2) == 16
19100 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19101 return (GET_MODE_SIZE (mode1) == 16
19102 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19104 /* If MODE2 is appropriate for an MMX register, then tie
19105 with any other mode acceptable to MMX registers. */
19106 if (GET_MODE_SIZE (mode2) == 8
19107 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19108 return (GET_MODE_SIZE (mode1) == 8
19109 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19111 return false;
19114 /* Return the cost of moving between two registers of mode MODE. */
19116 static int
19117 ix86_set_reg_reg_cost (machine_mode mode)
19119 unsigned int units = UNITS_PER_WORD;
19121 switch (GET_MODE_CLASS (mode))
19123 default:
19124 break;
19126 case MODE_CC:
19127 units = GET_MODE_SIZE (CCmode);
19128 break;
19130 case MODE_FLOAT:
19131 if ((TARGET_SSE && mode == TFmode)
19132 || (TARGET_80387 && mode == XFmode)
19133 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
19134 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
19135 units = GET_MODE_SIZE (mode);
19136 break;
19138 case MODE_COMPLEX_FLOAT:
19139 if ((TARGET_SSE && mode == TCmode)
19140 || (TARGET_80387 && mode == XCmode)
19141 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
19142 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
19143 units = GET_MODE_SIZE (mode);
19144 break;
19146 case MODE_VECTOR_INT:
19147 case MODE_VECTOR_FLOAT:
19148 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
19149 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
19150 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19151 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19152 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
19153 && VALID_MMX_REG_MODE (mode)))
19154 units = GET_MODE_SIZE (mode);
19157 /* Return the cost of moving between two registers of mode MODE,
19158 assuming that the move will be in pieces of at most UNITS bytes. */
19159 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
19162 /* Return cost of vector operation in MODE given that scalar version has
19163 COST. */
19165 static int
19166 ix86_vec_cost (machine_mode mode, int cost)
19168 if (!VECTOR_MODE_P (mode))
19169 return cost;
19171 if (GET_MODE_BITSIZE (mode) == 128
19172 && TARGET_SSE_SPLIT_REGS)
19173 return cost * 2;
19174 if (GET_MODE_BITSIZE (mode) > 128
19175 && TARGET_AVX256_SPLIT_REGS)
19176 return cost * GET_MODE_BITSIZE (mode) / 128;
19177 return cost;
19180 /* Return cost of multiplication in MODE. */
19182 static int
19183 ix86_multiplication_cost (const struct processor_costs *cost,
19184 enum machine_mode mode)
19186 machine_mode inner_mode = mode;
19187 if (VECTOR_MODE_P (mode))
19188 inner_mode = GET_MODE_INNER (mode);
19190 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19191 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
19192 else if (X87_FLOAT_MODE_P (mode))
19193 return cost->fmul;
19194 else if (FLOAT_MODE_P (mode))
19195 return ix86_vec_cost (mode,
19196 inner_mode == DFmode ? cost->mulsd : cost->mulss);
19197 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19199 /* vpmullq is used in this case. No emulation is needed. */
19200 if (TARGET_AVX512DQ)
19201 return ix86_vec_cost (mode, cost->mulss);
19203 /* V*QImode is emulated with 7-13 insns. */
19204 if (mode == V16QImode || mode == V32QImode)
19206 int extra = 11;
19207 if (TARGET_XOP && mode == V16QImode)
19208 extra = 5;
19209 else if (TARGET_SSSE3)
19210 extra = 6;
19211 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
19213 /* V*DImode is emulated with 5-8 insns. */
19214 else if (mode == V2DImode || mode == V4DImode)
19216 if (TARGET_XOP && mode == V2DImode)
19217 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
19218 else
19219 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
19221 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19222 insns, including two PMULUDQ. */
19223 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
19224 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
19225 else
19226 return ix86_vec_cost (mode, cost->mulss);
19228 else
19229 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
19232 /* Return cost of multiplication in MODE. */
19234 static int
19235 ix86_division_cost (const struct processor_costs *cost,
19236 enum machine_mode mode)
19238 machine_mode inner_mode = mode;
19239 if (VECTOR_MODE_P (mode))
19240 inner_mode = GET_MODE_INNER (mode);
19242 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19243 return inner_mode == DFmode ? cost->divsd : cost->divss;
19244 else if (X87_FLOAT_MODE_P (mode))
19245 return cost->fdiv;
19246 else if (FLOAT_MODE_P (mode))
19247 return ix86_vec_cost (mode,
19248 inner_mode == DFmode ? cost->divsd : cost->divss);
19249 else
19250 return cost->divide[MODE_INDEX (mode)];
19253 #define COSTS_N_BYTES(N) ((N) * 2)
19255 /* Return cost of shift in MODE.
19256 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19257 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19258 if op1 is a result of subreg.
19260 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19262 static int
19263 ix86_shift_rotate_cost (const struct processor_costs *cost,
19264 enum machine_mode mode, bool constant_op1,
19265 HOST_WIDE_INT op1_val,
19266 bool speed,
19267 bool and_in_op1,
19268 bool shift_and_truncate,
19269 bool *skip_op0, bool *skip_op1)
19271 if (skip_op0)
19272 *skip_op0 = *skip_op1 = false;
19273 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19275 /* V*QImode is emulated with 1-11 insns. */
19276 if (mode == V16QImode || mode == V32QImode)
19278 int count = 11;
19279 if (TARGET_XOP && mode == V16QImode)
19281 /* For XOP we use vpshab, which requires a broadcast of the
19282 value to the variable shift insn. For constants this
19283 means a V16Q const in mem; even when we can perform the
19284 shift with one insn set the cost to prefer paddb. */
19285 if (constant_op1)
19287 if (skip_op1)
19288 *skip_op1 = true;
19289 return ix86_vec_cost (mode,
19290 cost->sse_op
19291 + (speed
19293 : COSTS_N_BYTES
19294 (GET_MODE_UNIT_SIZE (mode))));
19296 count = 3;
19298 else if (TARGET_SSSE3)
19299 count = 7;
19300 return ix86_vec_cost (mode, cost->sse_op * count);
19302 else
19303 return ix86_vec_cost (mode, cost->sse_op);
19305 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19307 if (constant_op1)
19309 if (op1_val > 32)
19310 return cost->shift_const + COSTS_N_INSNS (2);
19311 else
19312 return cost->shift_const * 2;
19314 else
19316 if (and_in_op1)
19317 return cost->shift_var * 2;
19318 else
19319 return cost->shift_var * 6 + COSTS_N_INSNS (2);
19322 else
19324 if (constant_op1)
19325 return cost->shift_const;
19326 else if (shift_and_truncate)
19328 if (skip_op0)
19329 *skip_op0 = *skip_op1 = true;
19330 /* Return the cost after shift-and truncation. */
19331 return cost->shift_var;
19333 else
19334 return cost->shift_var;
19336 return cost->shift_const;
19339 /* Compute a (partial) cost for rtx X. Return true if the complete
19340 cost has been computed, and false if subexpressions should be
19341 scanned. In either case, *TOTAL contains the cost result. */
19343 static bool
19344 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19345 int *total, bool speed)
19347 rtx mask;
19348 enum rtx_code code = GET_CODE (x);
19349 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19350 const struct processor_costs *cost
19351 = speed ? ix86_tune_cost : &ix86_size_cost;
19352 int src_cost;
19354 switch (code)
19356 case SET:
19357 if (register_operand (SET_DEST (x), VOIDmode)
19358 && register_operand (SET_SRC (x), VOIDmode))
19360 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19361 return true;
19364 if (register_operand (SET_SRC (x), VOIDmode))
19365 /* Avoid potentially incorrect high cost from rtx_costs
19366 for non-tieable SUBREGs. */
19367 src_cost = 0;
19368 else
19370 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19372 if (CONSTANT_P (SET_SRC (x)))
19373 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19374 a small value, possibly zero for cheap constants. */
19375 src_cost += COSTS_N_INSNS (1);
19378 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19379 return true;
19381 case CONST_INT:
19382 case CONST:
19383 case LABEL_REF:
19384 case SYMBOL_REF:
19385 if (x86_64_immediate_operand (x, VOIDmode))
19386 *total = 0;
19387 else
19388 *total = 1;
19389 return true;
19391 case CONST_DOUBLE:
19392 if (IS_STACK_MODE (mode))
19393 switch (standard_80387_constant_p (x))
19395 case -1:
19396 case 0:
19397 break;
19398 case 1: /* 0.0 */
19399 *total = 1;
19400 return true;
19401 default: /* Other constants */
19402 *total = 2;
19403 return true;
19405 /* FALLTHRU */
19407 case CONST_VECTOR:
19408 switch (standard_sse_constant_p (x, mode))
19410 case 0:
19411 break;
19412 case 1: /* 0: xor eliminates false dependency */
19413 *total = 0;
19414 return true;
19415 default: /* -1: cmp contains false dependency */
19416 *total = 1;
19417 return true;
19419 /* FALLTHRU */
19421 case CONST_WIDE_INT:
19422 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19423 it'll probably end up. Add a penalty for size. */
19424 *total = (COSTS_N_INSNS (1)
19425 + (!TARGET_64BIT && flag_pic)
19426 + (GET_MODE_SIZE (mode) <= 4
19427 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19428 return true;
19430 case ZERO_EXTEND:
19431 /* The zero extensions is often completely free on x86_64, so make
19432 it as cheap as possible. */
19433 if (TARGET_64BIT && mode == DImode
19434 && GET_MODE (XEXP (x, 0)) == SImode)
19435 *total = 1;
19436 else if (TARGET_ZERO_EXTEND_WITH_AND)
19437 *total = cost->add;
19438 else
19439 *total = cost->movzx;
19440 return false;
19442 case SIGN_EXTEND:
19443 *total = cost->movsx;
19444 return false;
19446 case ASHIFT:
19447 if (SCALAR_INT_MODE_P (mode)
19448 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19449 && CONST_INT_P (XEXP (x, 1)))
19451 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19452 if (value == 1)
19454 *total = cost->add;
19455 return false;
19457 if ((value == 2 || value == 3)
19458 && cost->lea <= cost->shift_const)
19460 *total = cost->lea;
19461 return false;
19464 /* FALLTHRU */
19466 case ROTATE:
19467 case ASHIFTRT:
19468 case LSHIFTRT:
19469 case ROTATERT:
19470 bool skip_op0, skip_op1;
19471 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19472 CONST_INT_P (XEXP (x, 1))
19473 ? INTVAL (XEXP (x, 1)) : -1,
19474 speed,
19475 GET_CODE (XEXP (x, 1)) == AND,
19476 SUBREG_P (XEXP (x, 1))
19477 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19478 &skip_op0, &skip_op1);
19479 if (skip_op0 || skip_op1)
19481 if (!skip_op0)
19482 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19483 if (!skip_op1)
19484 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19485 return true;
19487 return false;
19489 case FMA:
19491 rtx sub;
19493 gcc_assert (FLOAT_MODE_P (mode));
19494 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19496 *total = ix86_vec_cost (mode,
19497 GET_MODE_INNER (mode) == SFmode
19498 ? cost->fmass : cost->fmasd);
19499 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19501 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19502 sub = XEXP (x, 0);
19503 if (GET_CODE (sub) == NEG)
19504 sub = XEXP (sub, 0);
19505 *total += rtx_cost (sub, mode, FMA, 0, speed);
19507 sub = XEXP (x, 2);
19508 if (GET_CODE (sub) == NEG)
19509 sub = XEXP (sub, 0);
19510 *total += rtx_cost (sub, mode, FMA, 2, speed);
19511 return true;
19514 case MULT:
19515 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19517 rtx op0 = XEXP (x, 0);
19518 rtx op1 = XEXP (x, 1);
19519 int nbits;
19520 if (CONST_INT_P (XEXP (x, 1)))
19522 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19523 for (nbits = 0; value != 0; value &= value - 1)
19524 nbits++;
19526 else
19527 /* This is arbitrary. */
19528 nbits = 7;
19530 /* Compute costs correctly for widening multiplication. */
19531 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19532 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19533 == GET_MODE_SIZE (mode))
19535 int is_mulwiden = 0;
19536 machine_mode inner_mode = GET_MODE (op0);
19538 if (GET_CODE (op0) == GET_CODE (op1))
19539 is_mulwiden = 1, op1 = XEXP (op1, 0);
19540 else if (CONST_INT_P (op1))
19542 if (GET_CODE (op0) == SIGN_EXTEND)
19543 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19544 == INTVAL (op1);
19545 else
19546 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19549 if (is_mulwiden)
19550 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19553 *total = (cost->mult_init[MODE_INDEX (mode)]
19554 + nbits * cost->mult_bit
19555 + rtx_cost (op0, mode, outer_code, opno, speed)
19556 + rtx_cost (op1, mode, outer_code, opno, speed));
19558 return true;
19560 *total = ix86_multiplication_cost (cost, mode);
19561 return false;
19563 case DIV:
19564 case UDIV:
19565 case MOD:
19566 case UMOD:
19567 *total = ix86_division_cost (cost, mode);
19568 return false;
19570 case PLUS:
19571 if (GET_MODE_CLASS (mode) == MODE_INT
19572 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
19574 if (GET_CODE (XEXP (x, 0)) == PLUS
19575 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19576 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19577 && CONSTANT_P (XEXP (x, 1)))
19579 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19580 if (val == 2 || val == 4 || val == 8)
19582 *total = cost->lea;
19583 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19584 outer_code, opno, speed);
19585 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
19586 outer_code, opno, speed);
19587 *total += rtx_cost (XEXP (x, 1), mode,
19588 outer_code, opno, speed);
19589 return true;
19592 else if (GET_CODE (XEXP (x, 0)) == MULT
19593 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19595 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19596 if (val == 2 || val == 4 || val == 8)
19598 *total = cost->lea;
19599 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19600 outer_code, opno, speed);
19601 *total += rtx_cost (XEXP (x, 1), mode,
19602 outer_code, opno, speed);
19603 return true;
19606 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19608 /* Add with carry, ignore the cost of adding a carry flag. */
19609 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
19610 *total = cost->add;
19611 else
19613 *total = cost->lea;
19614 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19615 outer_code, opno, speed);
19618 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
19619 outer_code, opno, speed);
19620 *total += rtx_cost (XEXP (x, 1), mode,
19621 outer_code, opno, speed);
19622 return true;
19625 /* FALLTHRU */
19627 case MINUS:
19628 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
19629 if (GET_MODE_CLASS (mode) == MODE_INT
19630 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
19631 && GET_CODE (XEXP (x, 0)) == MINUS
19632 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
19634 *total = cost->add;
19635 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
19636 outer_code, opno, speed);
19637 *total += rtx_cost (XEXP (x, 1), mode,
19638 outer_code, opno, speed);
19639 return true;
19642 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19644 *total = cost->addss;
19645 return false;
19647 else if (X87_FLOAT_MODE_P (mode))
19649 *total = cost->fadd;
19650 return false;
19652 else if (FLOAT_MODE_P (mode))
19654 *total = ix86_vec_cost (mode, cost->addss);
19655 return false;
19657 /* FALLTHRU */
19659 case AND:
19660 case IOR:
19661 case XOR:
19662 if (GET_MODE_CLASS (mode) == MODE_INT
19663 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19665 *total = (cost->add * 2
19666 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
19667 << (GET_MODE (XEXP (x, 0)) != DImode))
19668 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
19669 << (GET_MODE (XEXP (x, 1)) != DImode)));
19670 return true;
19672 /* FALLTHRU */
19674 case NEG:
19675 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19677 *total = cost->sse_op;
19678 return false;
19680 else if (X87_FLOAT_MODE_P (mode))
19682 *total = cost->fchs;
19683 return false;
19685 else if (FLOAT_MODE_P (mode))
19687 *total = ix86_vec_cost (mode, cost->sse_op);
19688 return false;
19690 /* FALLTHRU */
19692 case NOT:
19693 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19694 *total = ix86_vec_cost (mode, cost->sse_op);
19695 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19696 *total = cost->add * 2;
19697 else
19698 *total = cost->add;
19699 return false;
19701 case COMPARE:
19702 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19703 && XEXP (XEXP (x, 0), 1) == const1_rtx
19704 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19705 && XEXP (x, 1) == const0_rtx)
19707 /* This kind of construct is implemented using test[bwl].
19708 Treat it as if we had an AND. */
19709 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
19710 *total = (cost->add
19711 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
19712 opno, speed)
19713 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
19714 return true;
19717 if (GET_CODE (XEXP (x, 0)) == PLUS
19718 && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1)))
19720 /* This is an overflow detection, count it as a normal compare. */
19721 *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
19722 COMPARE, 0, speed);
19723 return true;
19726 /* The embedded comparison operand is completely free. */
19727 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
19728 && XEXP (x, 1) == const0_rtx)
19729 *total = 0;
19731 return false;
19733 case FLOAT_EXTEND:
19734 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19735 *total = 0;
19736 else
19737 *total = ix86_vec_cost (mode, cost->addss);
19738 return false;
19740 case FLOAT_TRUNCATE:
19741 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19742 *total = cost->fadd;
19743 else
19744 *total = ix86_vec_cost (mode, cost->addss);
19745 return false;
19747 case ABS:
19748 /* SSE requires memory load for the constant operand. It may make
19749 sense to account for this. Of course the constant operand may or
19750 may not be reused. */
19751 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19752 *total = cost->sse_op;
19753 else if (X87_FLOAT_MODE_P (mode))
19754 *total = cost->fabs;
19755 else if (FLOAT_MODE_P (mode))
19756 *total = ix86_vec_cost (mode, cost->sse_op);
19757 return false;
19759 case SQRT:
19760 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19761 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
19762 else if (X87_FLOAT_MODE_P (mode))
19763 *total = cost->fsqrt;
19764 else if (FLOAT_MODE_P (mode))
19765 *total = ix86_vec_cost (mode,
19766 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
19767 return false;
19769 case UNSPEC:
19770 if (XINT (x, 1) == UNSPEC_TP)
19771 *total = 0;
19772 return false;
19774 case VEC_SELECT:
19775 case VEC_CONCAT:
19776 case VEC_DUPLICATE:
19777 /* ??? Assume all of these vector manipulation patterns are
19778 recognizable. In which case they all pretty much have the
19779 same cost. */
19780 *total = cost->sse_op;
19781 return true;
19782 case VEC_MERGE:
19783 mask = XEXP (x, 2);
19784 /* This is masked instruction, assume the same cost,
19785 as nonmasked variant. */
19786 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
19787 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
19788 else
19789 *total = cost->sse_op;
19790 return true;
19792 default:
19793 return false;
19797 #if TARGET_MACHO
19799 static int current_machopic_label_num;
19801 /* Given a symbol name and its associated stub, write out the
19802 definition of the stub. */
19804 void
19805 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19807 unsigned int length;
19808 char *binder_name, *symbol_name, lazy_ptr_name[32];
19809 int label = ++current_machopic_label_num;
19811 /* For 64-bit we shouldn't get here. */
19812 gcc_assert (!TARGET_64BIT);
19814 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19815 symb = targetm.strip_name_encoding (symb);
19817 length = strlen (stub);
19818 binder_name = XALLOCAVEC (char, length + 32);
19819 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19821 length = strlen (symb);
19822 symbol_name = XALLOCAVEC (char, length + 32);
19823 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19825 sprintf (lazy_ptr_name, "L%d$lz", label);
19827 if (MACHOPIC_ATT_STUB)
19828 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
19829 else if (MACHOPIC_PURE)
19830 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
19831 else
19832 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19834 fprintf (file, "%s:\n", stub);
19835 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19837 if (MACHOPIC_ATT_STUB)
19839 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19841 else if (MACHOPIC_PURE)
19843 /* PIC stub. */
19844 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19845 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19846 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
19847 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19848 label, lazy_ptr_name, label);
19849 fprintf (file, "\tjmp\t*%%ecx\n");
19851 else
19852 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19854 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19855 it needs no stub-binding-helper. */
19856 if (MACHOPIC_ATT_STUB)
19857 return;
19859 fprintf (file, "%s:\n", binder_name);
19861 if (MACHOPIC_PURE)
19863 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
19864 fprintf (file, "\tpushl\t%%ecx\n");
19866 else
19867 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19869 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
19871 /* N.B. Keep the correspondence of these
19872 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19873 old-pic/new-pic/non-pic stubs; altering this will break
19874 compatibility with existing dylibs. */
19875 if (MACHOPIC_PURE)
19877 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19878 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
19880 else
19881 /* 16-byte -mdynamic-no-pic stub. */
19882 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
19884 fprintf (file, "%s:\n", lazy_ptr_name);
19885 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19886 fprintf (file, ASM_LONG "%s\n", binder_name);
19888 #endif /* TARGET_MACHO */
19890 /* Order the registers for register allocator. */
19892 void
19893 x86_order_regs_for_local_alloc (void)
19895 int pos = 0;
19896 int i;
19898 /* First allocate the local general purpose registers. */
19899 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19900 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
19901 reg_alloc_order [pos++] = i;
19903 /* Global general purpose registers. */
19904 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19905 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
19906 reg_alloc_order [pos++] = i;
19908 /* x87 registers come first in case we are doing FP math
19909 using them. */
19910 if (!TARGET_SSE_MATH)
19911 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19912 reg_alloc_order [pos++] = i;
19914 /* SSE registers. */
19915 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19916 reg_alloc_order [pos++] = i;
19917 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19918 reg_alloc_order [pos++] = i;
19920 /* Extended REX SSE registers. */
19921 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
19922 reg_alloc_order [pos++] = i;
19924 /* Mask register. */
19925 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
19926 reg_alloc_order [pos++] = i;
19928 /* x87 registers. */
19929 if (TARGET_SSE_MATH)
19930 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19931 reg_alloc_order [pos++] = i;
19933 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19934 reg_alloc_order [pos++] = i;
19936 /* Initialize the rest of array as we do not allocate some registers
19937 at all. */
19938 while (pos < FIRST_PSEUDO_REGISTER)
19939 reg_alloc_order [pos++] = 0;
19942 static bool
19943 ix86_ms_bitfield_layout_p (const_tree record_type)
19945 return ((TARGET_MS_BITFIELD_LAYOUT
19946 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19947 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
19950 /* Returns an expression indicating where the this parameter is
19951 located on entry to the FUNCTION. */
19953 static rtx
19954 x86_this_parameter (tree function)
19956 tree type = TREE_TYPE (function);
19957 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19958 int nregs;
19960 if (TARGET_64BIT)
19962 const int *parm_regs;
19964 if (ix86_function_type_abi (type) == MS_ABI)
19965 parm_regs = x86_64_ms_abi_int_parameter_registers;
19966 else
19967 parm_regs = x86_64_int_parameter_registers;
19968 return gen_rtx_REG (Pmode, parm_regs[aggr]);
19971 nregs = ix86_function_regparm (type, function);
19973 if (nregs > 0 && !stdarg_p (type))
19975 int regno;
19976 unsigned int ccvt = ix86_get_callcvt (type);
19978 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
19979 regno = aggr ? DX_REG : CX_REG;
19980 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
19982 regno = CX_REG;
19983 if (aggr)
19984 return gen_rtx_MEM (SImode,
19985 plus_constant (Pmode, stack_pointer_rtx, 4));
19987 else
19989 regno = AX_REG;
19990 if (aggr)
19992 regno = DX_REG;
19993 if (nregs == 1)
19994 return gen_rtx_MEM (SImode,
19995 plus_constant (Pmode,
19996 stack_pointer_rtx, 4));
19999 return gen_rtx_REG (SImode, regno);
20002 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
20003 aggr ? 8 : 4));
20006 /* Determine whether x86_output_mi_thunk can succeed. */
20008 static bool
20009 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
20010 const_tree function)
20012 /* 64-bit can handle anything. */
20013 if (TARGET_64BIT)
20014 return true;
20016 /* For 32-bit, everything's fine if we have one free register. */
20017 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20018 return true;
20020 /* Need a free register for vcall_offset. */
20021 if (vcall_offset)
20022 return false;
20024 /* Need a free register for GOT references. */
20025 if (flag_pic && !targetm.binds_local_p (function))
20026 return false;
20028 /* Otherwise ok. */
20029 return true;
20032 /* Output the assembler code for a thunk function. THUNK_DECL is the
20033 declaration for the thunk function itself, FUNCTION is the decl for
20034 the target function. DELTA is an immediate constant offset to be
20035 added to THIS. If VCALL_OFFSET is nonzero, the word at
20036 *(*this + vcall_offset) should be added to THIS. */
20038 static void
20039 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
20040 HOST_WIDE_INT vcall_offset, tree function)
20042 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
20043 rtx this_param = x86_this_parameter (function);
20044 rtx this_reg, tmp, fnaddr;
20045 unsigned int tmp_regno;
20046 rtx_insn *insn;
20048 if (TARGET_64BIT)
20049 tmp_regno = R10_REG;
20050 else
20052 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
20053 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20054 tmp_regno = AX_REG;
20055 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20056 tmp_regno = DX_REG;
20057 else
20058 tmp_regno = CX_REG;
20061 emit_note (NOTE_INSN_PROLOGUE_END);
20063 /* CET is enabled, insert EB instruction. */
20064 if ((flag_cf_protection & CF_BRANCH))
20065 emit_insn (gen_nop_endbr ());
20067 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20068 pull it in now and let DELTA benefit. */
20069 if (REG_P (this_param))
20070 this_reg = this_param;
20071 else if (vcall_offset)
20073 /* Put the this parameter into %eax. */
20074 this_reg = gen_rtx_REG (Pmode, AX_REG);
20075 emit_move_insn (this_reg, this_param);
20077 else
20078 this_reg = NULL_RTX;
20080 /* Adjust the this parameter by a fixed constant. */
20081 if (delta)
20083 rtx delta_rtx = GEN_INT (delta);
20084 rtx delta_dst = this_reg ? this_reg : this_param;
20086 if (TARGET_64BIT)
20088 if (!x86_64_general_operand (delta_rtx, Pmode))
20090 tmp = gen_rtx_REG (Pmode, tmp_regno);
20091 emit_move_insn (tmp, delta_rtx);
20092 delta_rtx = tmp;
20096 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
20099 /* Adjust the this parameter by a value stored in the vtable. */
20100 if (vcall_offset)
20102 rtx vcall_addr, vcall_mem, this_mem;
20104 tmp = gen_rtx_REG (Pmode, tmp_regno);
20106 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
20107 if (Pmode != ptr_mode)
20108 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
20109 emit_move_insn (tmp, this_mem);
20111 /* Adjust the this parameter. */
20112 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
20113 if (TARGET_64BIT
20114 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
20116 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
20117 emit_move_insn (tmp2, GEN_INT (vcall_offset));
20118 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
20121 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
20122 if (Pmode != ptr_mode)
20123 emit_insn (gen_addsi_1_zext (this_reg,
20124 gen_rtx_REG (ptr_mode,
20125 REGNO (this_reg)),
20126 vcall_mem));
20127 else
20128 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
20131 /* If necessary, drop THIS back to its stack slot. */
20132 if (this_reg && this_reg != this_param)
20133 emit_move_insn (this_param, this_reg);
20135 fnaddr = XEXP (DECL_RTL (function), 0);
20136 if (TARGET_64BIT)
20138 if (!flag_pic || targetm.binds_local_p (function)
20139 || TARGET_PECOFF)
20141 else
20143 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
20144 tmp = gen_rtx_CONST (Pmode, tmp);
20145 fnaddr = gen_const_mem (Pmode, tmp);
20148 else
20150 if (!flag_pic || targetm.binds_local_p (function))
20152 #if TARGET_MACHO
20153 else if (TARGET_MACHO)
20155 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
20156 fnaddr = XEXP (fnaddr, 0);
20158 #endif /* TARGET_MACHO */
20159 else
20161 tmp = gen_rtx_REG (Pmode, CX_REG);
20162 output_set_got (tmp, NULL_RTX);
20164 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
20165 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
20166 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
20167 fnaddr = gen_const_mem (Pmode, fnaddr);
20171 /* Our sibling call patterns do not allow memories, because we have no
20172 predicate that can distinguish between frame and non-frame memory.
20173 For our purposes here, we can get away with (ab)using a jump pattern,
20174 because we're going to do no optimization. */
20175 if (MEM_P (fnaddr))
20177 if (sibcall_insn_operand (fnaddr, word_mode))
20179 fnaddr = XEXP (DECL_RTL (function), 0);
20180 tmp = gen_rtx_MEM (QImode, fnaddr);
20181 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20182 tmp = emit_call_insn (tmp);
20183 SIBLING_CALL_P (tmp) = 1;
20185 else
20186 emit_jump_insn (gen_indirect_jump (fnaddr));
20188 else
20190 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
20192 // CM_LARGE_PIC always uses pseudo PIC register which is
20193 // uninitialized. Since FUNCTION is local and calling it
20194 // doesn't go through PLT, we use scratch register %r11 as
20195 // PIC register and initialize it here.
20196 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
20197 ix86_init_large_pic_reg (tmp_regno);
20198 fnaddr = legitimize_pic_address (fnaddr,
20199 gen_rtx_REG (Pmode, tmp_regno));
20202 if (!sibcall_insn_operand (fnaddr, word_mode))
20204 tmp = gen_rtx_REG (word_mode, tmp_regno);
20205 if (GET_MODE (fnaddr) != word_mode)
20206 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
20207 emit_move_insn (tmp, fnaddr);
20208 fnaddr = tmp;
20211 tmp = gen_rtx_MEM (QImode, fnaddr);
20212 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20213 tmp = emit_call_insn (tmp);
20214 SIBLING_CALL_P (tmp) = 1;
20216 emit_barrier ();
20218 /* Emit just enough of rest_of_compilation to get the insns emitted. */
20219 insn = get_insns ();
20220 shorten_branches (insn);
20221 assemble_start_function (thunk_fndecl, fnname);
20222 final_start_function (insn, file, 1);
20223 final (insn, file, 1);
20224 final_end_function ();
20225 assemble_end_function (thunk_fndecl, fnname);
20228 static void
20229 x86_file_start (void)
20231 default_file_start ();
20232 if (TARGET_16BIT)
20233 fputs ("\t.code16gcc\n", asm_out_file);
20234 #if TARGET_MACHO
20235 darwin_file_start ();
20236 #endif
20237 if (X86_FILE_START_VERSION_DIRECTIVE)
20238 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20239 if (X86_FILE_START_FLTUSED)
20240 fputs ("\t.global\t__fltused\n", asm_out_file);
20241 if (ix86_asm_dialect == ASM_INTEL)
20242 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
20246 x86_field_alignment (tree type, int computed)
20248 machine_mode mode;
20250 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20251 return computed;
20252 if (TARGET_IAMCU)
20253 return iamcu_alignment (type, computed);
20254 mode = TYPE_MODE (strip_array_types (type));
20255 if (mode == DFmode || mode == DCmode
20256 || GET_MODE_CLASS (mode) == MODE_INT
20257 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20258 return MIN (32, computed);
20259 return computed;
20262 /* Print call to TARGET to FILE. */
20264 static void
20265 x86_print_call_or_nop (FILE *file, const char *target)
20267 if (flag_nop_mcount || !strcmp (target, "nop"))
20268 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20269 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20270 else
20271 fprintf (file, "1:\tcall\t%s\n", target);
20274 static bool
20275 current_fentry_name (const char **name)
20277 tree attr = lookup_attribute ("fentry_name",
20278 DECL_ATTRIBUTES (current_function_decl));
20279 if (!attr)
20280 return false;
20281 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20282 return true;
20285 static bool
20286 current_fentry_section (const char **name)
20288 tree attr = lookup_attribute ("fentry_section",
20289 DECL_ATTRIBUTES (current_function_decl));
20290 if (!attr)
20291 return false;
20292 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20293 return true;
20296 /* Output assembler code to FILE to increment profiler label # LABELNO
20297 for profiling a function entry. */
20298 void
20299 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20301 if (cfun->machine->insn_queued_at_entrance)
20303 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
20304 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20305 unsigned int patch_area_size
20306 = crtl->patch_area_size - crtl->patch_area_entry;
20307 if (patch_area_size)
20308 ix86_output_patchable_area (patch_area_size,
20309 crtl->patch_area_entry == 0);
20312 const char *mcount_name = MCOUNT_NAME;
20314 if (current_fentry_name (&mcount_name))
20316 else if (fentry_name)
20317 mcount_name = fentry_name;
20318 else if (flag_fentry)
20319 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20321 if (TARGET_64BIT)
20323 #ifndef NO_PROFILE_COUNTERS
20324 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20325 #endif
20327 if (!TARGET_PECOFF && flag_pic)
20328 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20329 else
20330 x86_print_call_or_nop (file, mcount_name);
20332 else if (flag_pic)
20334 #ifndef NO_PROFILE_COUNTERS
20335 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20336 LPREFIX, labelno);
20337 #endif
20338 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20340 else
20342 #ifndef NO_PROFILE_COUNTERS
20343 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20344 LPREFIX, labelno);
20345 #endif
20346 x86_print_call_or_nop (file, mcount_name);
20349 if (flag_record_mcount
20350 || lookup_attribute ("fentry_section",
20351 DECL_ATTRIBUTES (current_function_decl)))
20353 const char *sname = "__mcount_loc";
20355 if (current_fentry_section (&sname))
20357 else if (fentry_section)
20358 sname = fentry_section;
20360 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20361 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20362 fprintf (file, "\t.previous\n");
20366 /* We don't have exact information about the insn sizes, but we may assume
20367 quite safely that we are informed about all 1 byte insns and memory
20368 address sizes. This is enough to eliminate unnecessary padding in
20369 99% of cases. */
20372 ix86_min_insn_size (rtx_insn *insn)
20374 int l = 0, len;
20376 if (!INSN_P (insn) || !active_insn_p (insn))
20377 return 0;
20379 /* Discard alignments we've emit and jump instructions. */
20380 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20381 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20382 return 0;
20384 /* Important case - calls are always 5 bytes.
20385 It is common to have many calls in the row. */
20386 if (CALL_P (insn)
20387 && symbolic_reference_mentioned_p (PATTERN (insn))
20388 && !SIBLING_CALL_P (insn))
20389 return 5;
20390 len = get_attr_length (insn);
20391 if (len <= 1)
20392 return 1;
20394 /* For normal instructions we rely on get_attr_length being exact,
20395 with a few exceptions. */
20396 if (!JUMP_P (insn))
20398 enum attr_type type = get_attr_type (insn);
20400 switch (type)
20402 case TYPE_MULTI:
20403 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20404 || asm_noperands (PATTERN (insn)) >= 0)
20405 return 0;
20406 break;
20407 case TYPE_OTHER:
20408 case TYPE_FCMP:
20409 break;
20410 default:
20411 /* Otherwise trust get_attr_length. */
20412 return len;
20415 l = get_attr_length_address (insn);
20416 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20417 l = 4;
20419 if (l)
20420 return 1+l;
20421 else
20422 return 2;
20425 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20427 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20428 window. */
20430 static void
20431 ix86_avoid_jump_mispredicts (void)
20433 rtx_insn *insn, *start = get_insns ();
20434 int nbytes = 0, njumps = 0;
20435 bool isjump = false;
20437 /* Look for all minimal intervals of instructions containing 4 jumps.
20438 The intervals are bounded by START and INSN. NBYTES is the total
20439 size of instructions in the interval including INSN and not including
20440 START. When the NBYTES is smaller than 16 bytes, it is possible
20441 that the end of START and INSN ends up in the same 16byte page.
20443 The smallest offset in the page INSN can start is the case where START
20444 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20445 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20447 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20448 have to, control transfer to label(s) can be performed through other
20449 means, and also we estimate minimum length of all asm stmts as 0. */
20450 for (insn = start; insn; insn = NEXT_INSN (insn))
20452 int min_size;
20454 if (LABEL_P (insn))
20456 align_flags alignment = label_to_alignment (insn);
20457 int align = alignment.levels[0].log;
20458 int max_skip = alignment.levels[0].maxskip;
20460 if (max_skip > 15)
20461 max_skip = 15;
20462 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20463 already in the current 16 byte page, because otherwise
20464 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20465 bytes to reach 16 byte boundary. */
20466 if (align <= 0
20467 || (align <= 3 && max_skip != (1 << align) - 1))
20468 max_skip = 0;
20469 if (dump_file)
20470 fprintf (dump_file, "Label %i with max_skip %i\n",
20471 INSN_UID (insn), max_skip);
20472 if (max_skip)
20474 while (nbytes + max_skip >= 16)
20476 start = NEXT_INSN (start);
20477 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20478 || CALL_P (start))
20479 njumps--, isjump = true;
20480 else
20481 isjump = false;
20482 nbytes -= ix86_min_insn_size (start);
20485 continue;
20488 min_size = ix86_min_insn_size (insn);
20489 nbytes += min_size;
20490 if (dump_file)
20491 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20492 INSN_UID (insn), min_size);
20493 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20494 || CALL_P (insn))
20495 njumps++;
20496 else
20497 continue;
20499 while (njumps > 3)
20501 start = NEXT_INSN (start);
20502 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20503 || CALL_P (start))
20504 njumps--, isjump = true;
20505 else
20506 isjump = false;
20507 nbytes -= ix86_min_insn_size (start);
20509 gcc_assert (njumps >= 0);
20510 if (dump_file)
20511 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20512 INSN_UID (start), INSN_UID (insn), nbytes);
20514 if (njumps == 3 && isjump && nbytes < 16)
20516 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20518 if (dump_file)
20519 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20520 INSN_UID (insn), padsize);
20521 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20525 #endif
20527 /* AMD Athlon works faster
20528 when RET is not destination of conditional jump or directly preceded
20529 by other jump instruction. We avoid the penalty by inserting NOP just
20530 before the RET instructions in such cases. */
20531 static void
20532 ix86_pad_returns (void)
20534 edge e;
20535 edge_iterator ei;
20537 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20539 basic_block bb = e->src;
20540 rtx_insn *ret = BB_END (bb);
20541 rtx_insn *prev;
20542 bool replace = false;
20544 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
20545 || optimize_bb_for_size_p (bb))
20546 continue;
20547 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20548 if (active_insn_p (prev) || LABEL_P (prev))
20549 break;
20550 if (prev && LABEL_P (prev))
20552 edge e;
20553 edge_iterator ei;
20555 FOR_EACH_EDGE (e, ei, bb->preds)
20556 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20557 && !(e->flags & EDGE_FALLTHRU))
20559 replace = true;
20560 break;
20563 if (!replace)
20565 prev = prev_active_insn (ret);
20566 if (prev
20567 && ((JUMP_P (prev) && any_condjump_p (prev))
20568 || CALL_P (prev)))
20569 replace = true;
20570 /* Empty functions get branch mispredict even when
20571 the jump destination is not visible to us. */
20572 if (!prev && !optimize_function_for_size_p (cfun))
20573 replace = true;
20575 if (replace)
20577 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
20578 delete_insn (ret);
20583 /* Count the minimum number of instructions in BB. Return 4 if the
20584 number of instructions >= 4. */
20586 static int
20587 ix86_count_insn_bb (basic_block bb)
20589 rtx_insn *insn;
20590 int insn_count = 0;
20592 /* Count number of instructions in this block. Return 4 if the number
20593 of instructions >= 4. */
20594 FOR_BB_INSNS (bb, insn)
20596 /* Only happen in exit blocks. */
20597 if (JUMP_P (insn)
20598 && ANY_RETURN_P (PATTERN (insn)))
20599 break;
20601 if (NONDEBUG_INSN_P (insn)
20602 && GET_CODE (PATTERN (insn)) != USE
20603 && GET_CODE (PATTERN (insn)) != CLOBBER)
20605 insn_count++;
20606 if (insn_count >= 4)
20607 return insn_count;
20611 return insn_count;
20615 /* Count the minimum number of instructions in code path in BB.
20616 Return 4 if the number of instructions >= 4. */
20618 static int
20619 ix86_count_insn (basic_block bb)
20621 edge e;
20622 edge_iterator ei;
20623 int min_prev_count;
20625 /* Only bother counting instructions along paths with no
20626 more than 2 basic blocks between entry and exit. Given
20627 that BB has an edge to exit, determine if a predecessor
20628 of BB has an edge from entry. If so, compute the number
20629 of instructions in the predecessor block. If there
20630 happen to be multiple such blocks, compute the minimum. */
20631 min_prev_count = 4;
20632 FOR_EACH_EDGE (e, ei, bb->preds)
20634 edge prev_e;
20635 edge_iterator prev_ei;
20637 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20639 min_prev_count = 0;
20640 break;
20642 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
20644 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
20646 int count = ix86_count_insn_bb (e->src);
20647 if (count < min_prev_count)
20648 min_prev_count = count;
20649 break;
20654 if (min_prev_count < 4)
20655 min_prev_count += ix86_count_insn_bb (bb);
20657 return min_prev_count;
20660 /* Pad short function to 4 instructions. */
20662 static void
20663 ix86_pad_short_function (void)
20665 edge e;
20666 edge_iterator ei;
20668 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20670 rtx_insn *ret = BB_END (e->src);
20671 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
20673 int insn_count = ix86_count_insn (e->src);
20675 /* Pad short function. */
20676 if (insn_count < 4)
20678 rtx_insn *insn = ret;
20680 /* Find epilogue. */
20681 while (insn
20682 && (!NOTE_P (insn)
20683 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
20684 insn = PREV_INSN (insn);
20686 if (!insn)
20687 insn = ret;
20689 /* Two NOPs count as one instruction. */
20690 insn_count = 2 * (4 - insn_count);
20691 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
20697 /* Fix up a Windows system unwinder issue. If an EH region falls through into
20698 the epilogue, the Windows system unwinder will apply epilogue logic and
20699 produce incorrect offsets. This can be avoided by adding a nop between
20700 the last insn that can throw and the first insn of the epilogue. */
20702 static void
20703 ix86_seh_fixup_eh_fallthru (void)
20705 edge e;
20706 edge_iterator ei;
20708 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20710 rtx_insn *insn, *next;
20712 /* Find the beginning of the epilogue. */
20713 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
20714 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
20715 break;
20716 if (insn == NULL)
20717 continue;
20719 /* We only care about preceding insns that can throw. */
20720 insn = prev_active_insn (insn);
20721 if (insn == NULL || !can_throw_internal (insn))
20722 continue;
20724 /* Do not separate calls from their debug information. */
20725 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
20726 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
20727 insn = next;
20728 else
20729 break;
20731 emit_insn_after (gen_nops (const1_rtx), insn);
20735 /* Implement machine specific optimizations. We implement padding of returns
20736 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20737 static void
20738 ix86_reorg (void)
20740 /* We are freeing block_for_insn in the toplev to keep compatibility
20741 with old MDEP_REORGS that are not CFG based. Recompute it now. */
20742 compute_bb_for_insn ();
20744 if (TARGET_SEH && current_function_has_exception_handlers ())
20745 ix86_seh_fixup_eh_fallthru ();
20747 if (optimize && optimize_function_for_speed_p (cfun))
20749 if (TARGET_PAD_SHORT_FUNCTION)
20750 ix86_pad_short_function ();
20751 else if (TARGET_PAD_RETURNS)
20752 ix86_pad_returns ();
20753 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20754 if (TARGET_FOUR_JUMP_LIMIT)
20755 ix86_avoid_jump_mispredicts ();
20756 #endif
20760 /* Return nonzero when QImode register that must be represented via REX prefix
20761 is used. */
20762 bool
20763 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
20765 int i;
20766 extract_insn_cached (insn);
20767 for (i = 0; i < recog_data.n_operands; i++)
20768 if (GENERAL_REG_P (recog_data.operand[i])
20769 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
20770 return true;
20771 return false;
20774 /* Return true when INSN mentions register that must be encoded using REX
20775 prefix. */
20776 bool
20777 x86_extended_reg_mentioned_p (rtx insn)
20779 subrtx_iterator::array_type array;
20780 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
20782 const_rtx x = *iter;
20783 if (REG_P (x)
20784 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
20785 return true;
20787 return false;
20790 /* If profitable, negate (without causing overflow) integer constant
20791 of mode MODE at location LOC. Return true in this case. */
20792 bool
20793 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
20795 HOST_WIDE_INT val;
20797 if (!CONST_INT_P (*loc))
20798 return false;
20800 switch (mode)
20802 case E_DImode:
20803 /* DImode x86_64 constants must fit in 32 bits. */
20804 gcc_assert (x86_64_immediate_operand (*loc, mode));
20806 mode = SImode;
20807 break;
20809 case E_SImode:
20810 case E_HImode:
20811 case E_QImode:
20812 break;
20814 default:
20815 gcc_unreachable ();
20818 /* Avoid overflows. */
20819 if (mode_signbit_p (mode, *loc))
20820 return false;
20822 val = INTVAL (*loc);
20824 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20825 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
20826 if ((val < 0 && val != -128)
20827 || val == 128)
20829 *loc = GEN_INT (-val);
20830 return true;
20833 return false;
20836 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20837 optabs would emit if we didn't have TFmode patterns. */
20839 void
20840 x86_emit_floatuns (rtx operands[2])
20842 rtx_code_label *neglab, *donelab;
20843 rtx i0, i1, f0, in, out;
20844 machine_mode mode, inmode;
20846 inmode = GET_MODE (operands[1]);
20847 gcc_assert (inmode == SImode || inmode == DImode);
20849 out = operands[0];
20850 in = force_reg (inmode, operands[1]);
20851 mode = GET_MODE (out);
20852 neglab = gen_label_rtx ();
20853 donelab = gen_label_rtx ();
20854 f0 = gen_reg_rtx (mode);
20856 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20858 expand_float (out, in, 0);
20860 emit_jump_insn (gen_jump (donelab));
20861 emit_barrier ();
20863 emit_label (neglab);
20865 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20866 1, OPTAB_DIRECT);
20867 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20868 1, OPTAB_DIRECT);
20869 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20871 expand_float (f0, i0, 0);
20873 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
20875 emit_label (donelab);
20878 /* Target hook for scalar_mode_supported_p. */
20879 static bool
20880 ix86_scalar_mode_supported_p (scalar_mode mode)
20882 if (DECIMAL_FLOAT_MODE_P (mode))
20883 return default_decimal_float_supported_p ();
20884 else if (mode == TFmode)
20885 return true;
20886 else
20887 return default_scalar_mode_supported_p (mode);
20890 /* Implements target hook vector_mode_supported_p. */
20891 static bool
20892 ix86_vector_mode_supported_p (machine_mode mode)
20894 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20895 return true;
20896 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20897 return true;
20898 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20899 return true;
20900 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20901 return true;
20902 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
20903 && VALID_MMX_REG_MODE (mode))
20904 return true;
20905 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
20906 && VALID_MMX_REG_MODE_3DNOW (mode))
20907 return true;
20908 return false;
20911 /* Target hook for c_mode_for_suffix. */
20912 static machine_mode
20913 ix86_c_mode_for_suffix (char suffix)
20915 if (suffix == 'q')
20916 return TFmode;
20917 if (suffix == 'w')
20918 return XFmode;
20920 return VOIDmode;
20923 /* Worker function for TARGET_MD_ASM_ADJUST.
20925 We implement asm flag outputs, and maintain source compatibility
20926 with the old cc0-based compiler. */
20928 static rtx_insn *
20929 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
20930 vec<const char *> &constraints,
20931 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
20933 bool saw_asm_flag = false;
20935 start_sequence ();
20936 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
20938 const char *con = constraints[i];
20939 if (strncmp (con, "=@cc", 4) != 0)
20940 continue;
20941 con += 4;
20942 if (strchr (con, ',') != NULL)
20944 error ("alternatives not allowed in %<asm%> flag output");
20945 continue;
20948 bool invert = false;
20949 if (con[0] == 'n')
20950 invert = true, con++;
20952 machine_mode mode = CCmode;
20953 rtx_code code = UNKNOWN;
20955 switch (con[0])
20957 case 'a':
20958 if (con[1] == 0)
20959 mode = CCAmode, code = EQ;
20960 else if (con[1] == 'e' && con[2] == 0)
20961 mode = CCCmode, code = NE;
20962 break;
20963 case 'b':
20964 if (con[1] == 0)
20965 mode = CCCmode, code = EQ;
20966 else if (con[1] == 'e' && con[2] == 0)
20967 mode = CCAmode, code = NE;
20968 break;
20969 case 'c':
20970 if (con[1] == 0)
20971 mode = CCCmode, code = EQ;
20972 break;
20973 case 'e':
20974 if (con[1] == 0)
20975 mode = CCZmode, code = EQ;
20976 break;
20977 case 'g':
20978 if (con[1] == 0)
20979 mode = CCGCmode, code = GT;
20980 else if (con[1] == 'e' && con[2] == 0)
20981 mode = CCGCmode, code = GE;
20982 break;
20983 case 'l':
20984 if (con[1] == 0)
20985 mode = CCGCmode, code = LT;
20986 else if (con[1] == 'e' && con[2] == 0)
20987 mode = CCGCmode, code = LE;
20988 break;
20989 case 'o':
20990 if (con[1] == 0)
20991 mode = CCOmode, code = EQ;
20992 break;
20993 case 'p':
20994 if (con[1] == 0)
20995 mode = CCPmode, code = EQ;
20996 break;
20997 case 's':
20998 if (con[1] == 0)
20999 mode = CCSmode, code = EQ;
21000 break;
21001 case 'z':
21002 if (con[1] == 0)
21003 mode = CCZmode, code = EQ;
21004 break;
21006 if (code == UNKNOWN)
21008 error ("unknown %<asm%> flag output %qs", constraints[i]);
21009 continue;
21011 if (invert)
21012 code = reverse_condition (code);
21014 rtx dest = outputs[i];
21015 if (!saw_asm_flag)
21017 /* This is the first asm flag output. Here we put the flags
21018 register in as the real output and adjust the condition to
21019 allow it. */
21020 constraints[i] = "=Bf";
21021 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
21022 saw_asm_flag = true;
21024 else
21026 /* We don't need the flags register as output twice. */
21027 constraints[i] = "=X";
21028 outputs[i] = gen_rtx_SCRATCH (SImode);
21031 rtx x = gen_rtx_REG (mode, FLAGS_REG);
21032 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
21034 machine_mode dest_mode = GET_MODE (dest);
21035 if (!SCALAR_INT_MODE_P (dest_mode))
21037 error ("invalid type for %<asm%> flag output");
21038 continue;
21041 if (dest_mode == DImode && !TARGET_64BIT)
21042 dest_mode = SImode;
21044 if (dest_mode != QImode)
21046 rtx destqi = gen_reg_rtx (QImode);
21047 emit_insn (gen_rtx_SET (destqi, x));
21049 if (TARGET_ZERO_EXTEND_WITH_AND
21050 && optimize_function_for_speed_p (cfun))
21052 x = force_reg (dest_mode, const0_rtx);
21054 emit_insn (gen_movstrictqi (gen_lowpart (QImode, x), destqi));
21056 else
21058 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
21059 if (dest_mode == GET_MODE (dest)
21060 && !register_operand (dest, GET_MODE (dest)))
21061 x = force_reg (dest_mode, x);
21065 if (dest_mode != GET_MODE (dest))
21067 rtx tmp = gen_reg_rtx (SImode);
21069 emit_insn (gen_rtx_SET (tmp, x));
21070 emit_insn (gen_zero_extendsidi2 (dest, tmp));
21072 else
21073 emit_insn (gen_rtx_SET (dest, x));
21075 rtx_insn *seq = get_insns ();
21076 end_sequence ();
21078 if (saw_asm_flag)
21079 return seq;
21080 else
21082 /* If we had no asm flag outputs, clobber the flags. */
21083 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
21084 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
21085 return NULL;
21089 /* Implements target vector targetm.asm.encode_section_info. */
21091 static void ATTRIBUTE_UNUSED
21092 ix86_encode_section_info (tree decl, rtx rtl, int first)
21094 default_encode_section_info (decl, rtl, first);
21096 if (ix86_in_large_data_p (decl))
21097 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21100 /* Worker function for REVERSE_CONDITION. */
21102 enum rtx_code
21103 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
21105 return (mode == CCFPmode
21106 ? reverse_condition_maybe_unordered (code)
21107 : reverse_condition (code));
21110 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21111 to OPERANDS[0]. */
21113 const char *
21114 output_387_reg_move (rtx_insn *insn, rtx *operands)
21116 if (REG_P (operands[0]))
21118 if (REG_P (operands[1])
21119 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21121 if (REGNO (operands[0]) == FIRST_STACK_REG)
21122 return output_387_ffreep (operands, 0);
21123 return "fstp\t%y0";
21125 if (STACK_TOP_P (operands[0]))
21126 return "fld%Z1\t%y1";
21127 return "fst\t%y0";
21129 else if (MEM_P (operands[0]))
21131 gcc_assert (REG_P (operands[1]));
21132 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21133 return "fstp%Z0\t%y0";
21134 else
21136 /* There is no non-popping store to memory for XFmode.
21137 So if we need one, follow the store with a load. */
21138 if (GET_MODE (operands[0]) == XFmode)
21139 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21140 else
21141 return "fst%Z0\t%y0";
21144 else
21145 gcc_unreachable();
21147 #ifdef TARGET_SOLARIS
21148 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21150 static void
21151 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21152 tree decl)
21154 /* With Binutils 2.15, the "@unwind" marker must be specified on
21155 every occurrence of the ".eh_frame" section, not just the first
21156 one. */
21157 if (TARGET_64BIT
21158 && strcmp (name, ".eh_frame") == 0)
21160 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21161 flags & SECTION_WRITE ? "aw" : "a");
21162 return;
21165 #ifndef USE_GAS
21166 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
21168 solaris_elf_asm_comdat_section (name, flags, decl);
21169 return;
21172 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21173 SPARC assembler. One cannot mix single-letter flags and #exclude, so
21174 only emit the latter here. */
21175 if (flags & SECTION_EXCLUDE)
21177 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
21178 return;
21180 #endif
21182 default_elf_asm_named_section (name, flags, decl);
21184 #endif /* TARGET_SOLARIS */
21186 /* Return the mangling of TYPE if it is an extended fundamental type. */
21188 static const char *
21189 ix86_mangle_type (const_tree type)
21191 type = TYPE_MAIN_VARIANT (type);
21193 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
21194 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
21195 return NULL;
21197 switch (TYPE_MODE (type))
21199 case E_TFmode:
21200 /* __float128 is "g". */
21201 return "g";
21202 case E_XFmode:
21203 /* "long double" or __float80 is "e". */
21204 return "e";
21205 default:
21206 return NULL;
21210 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
21212 static tree
21213 ix86_stack_protect_guard (void)
21215 if (TARGET_SSP_TLS_GUARD)
21217 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
21218 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
21219 tree type = build_qualified_type (type_node, qual);
21220 tree t;
21222 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
21224 t = ix86_tls_stack_chk_guard_decl;
21226 if (t == NULL)
21228 rtx x;
21230 t = build_decl
21231 (UNKNOWN_LOCATION, VAR_DECL,
21232 get_identifier (ix86_stack_protector_guard_symbol_str),
21233 type);
21234 TREE_STATIC (t) = 1;
21235 TREE_PUBLIC (t) = 1;
21236 DECL_EXTERNAL (t) = 1;
21237 TREE_USED (t) = 1;
21238 TREE_THIS_VOLATILE (t) = 1;
21239 DECL_ARTIFICIAL (t) = 1;
21240 DECL_IGNORED_P (t) = 1;
21242 /* Do not share RTL as the declaration is visible outside of
21243 current function. */
21244 x = DECL_RTL (t);
21245 RTX_FLAG (x, used) = 1;
21247 ix86_tls_stack_chk_guard_decl = t;
21250 else
21252 tree asptrtype = build_pointer_type (type);
21254 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
21255 t = build2 (MEM_REF, asptrtype, t,
21256 build_int_cst (asptrtype, 0));
21257 TREE_THIS_VOLATILE (t) = 1;
21260 return t;
21263 return default_stack_protect_guard ();
21266 /* For 32-bit code we can save PIC register setup by using
21267 __stack_chk_fail_local hidden function instead of calling
21268 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21269 register, so it is better to call __stack_chk_fail directly. */
21271 static tree ATTRIBUTE_UNUSED
21272 ix86_stack_protect_fail (void)
21274 return TARGET_64BIT
21275 ? default_external_stack_protect_fail ()
21276 : default_hidden_stack_protect_fail ();
21279 /* Select a format to encode pointers in exception handling data. CODE
21280 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21281 true if the symbol may be affected by dynamic relocations.
21283 ??? All x86 object file formats are capable of representing this.
21284 After all, the relocation needed is the same as for the call insn.
21285 Whether or not a particular assembler allows us to enter such, I
21286 guess we'll have to see. */
21288 asm_preferred_eh_data_format (int code, int global)
21290 if (flag_pic)
21292 int type = DW_EH_PE_sdata8;
21293 if (!TARGET_64BIT
21294 || ix86_cmodel == CM_SMALL_PIC
21295 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21296 type = DW_EH_PE_sdata4;
21297 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21299 if (ix86_cmodel == CM_SMALL
21300 || (ix86_cmodel == CM_MEDIUM && code))
21301 return DW_EH_PE_udata4;
21302 return DW_EH_PE_absptr;
21305 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21306 static int
21307 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21308 tree vectype, int)
21310 bool fp = false;
21311 machine_mode mode = TImode;
21312 int index;
21313 if (vectype != NULL)
21315 fp = FLOAT_TYPE_P (vectype);
21316 mode = TYPE_MODE (vectype);
21319 switch (type_of_cost)
21321 case scalar_stmt:
21322 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21324 case scalar_load:
21325 /* load/store costs are relative to register move which is 2. Recompute
21326 it to COSTS_N_INSNS so everything have same base. */
21327 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21328 : ix86_cost->int_load [2]) / 2;
21330 case scalar_store:
21331 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21332 : ix86_cost->int_store [2]) / 2;
21334 case vector_stmt:
21335 return ix86_vec_cost (mode,
21336 fp ? ix86_cost->addss : ix86_cost->sse_op);
21338 case vector_load:
21339 index = sse_store_index (mode);
21340 /* See PR82713 - we may end up being called on non-vector type. */
21341 if (index < 0)
21342 index = 2;
21343 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21345 case vector_store:
21346 index = sse_store_index (mode);
21347 /* See PR82713 - we may end up being called on non-vector type. */
21348 if (index < 0)
21349 index = 2;
21350 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21352 case vec_to_scalar:
21353 case scalar_to_vec:
21354 return ix86_vec_cost (mode, ix86_cost->sse_op);
21356 /* We should have separate costs for unaligned loads and gather/scatter.
21357 Do that incrementally. */
21358 case unaligned_load:
21359 index = sse_store_index (mode);
21360 /* See PR82713 - we may end up being called on non-vector type. */
21361 if (index < 0)
21362 index = 2;
21363 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21365 case unaligned_store:
21366 index = sse_store_index (mode);
21367 /* See PR82713 - we may end up being called on non-vector type. */
21368 if (index < 0)
21369 index = 2;
21370 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21372 case vector_gather_load:
21373 return ix86_vec_cost (mode,
21374 COSTS_N_INSNS
21375 (ix86_cost->gather_static
21376 + ix86_cost->gather_per_elt
21377 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21379 case vector_scatter_store:
21380 return ix86_vec_cost (mode,
21381 COSTS_N_INSNS
21382 (ix86_cost->scatter_static
21383 + ix86_cost->scatter_per_elt
21384 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21386 case cond_branch_taken:
21387 return ix86_cost->cond_taken_branch_cost;
21389 case cond_branch_not_taken:
21390 return ix86_cost->cond_not_taken_branch_cost;
21392 case vec_perm:
21393 case vec_promote_demote:
21394 return ix86_vec_cost (mode, ix86_cost->sse_op);
21396 case vec_construct:
21398 /* N element inserts into SSE vectors. */
21399 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21400 /* One vinserti128 for combining two SSE vectors for AVX256. */
21401 if (GET_MODE_BITSIZE (mode) == 256)
21402 cost += ix86_vec_cost (mode, ix86_cost->addss);
21403 /* One vinserti64x4 and two vinserti128 for combining SSE
21404 and AVX256 vectors to AVX512. */
21405 else if (GET_MODE_BITSIZE (mode) == 512)
21406 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21407 return cost;
21410 default:
21411 gcc_unreachable ();
21416 /* This function returns the calling abi specific va_list type node.
21417 It returns the FNDECL specific va_list type. */
21419 static tree
21420 ix86_fn_abi_va_list (tree fndecl)
21422 if (!TARGET_64BIT)
21423 return va_list_type_node;
21424 gcc_assert (fndecl != NULL_TREE);
21426 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21427 return ms_va_list_type_node;
21428 else
21429 return sysv_va_list_type_node;
21432 /* Returns the canonical va_list type specified by TYPE. If there
21433 is no valid TYPE provided, it return NULL_TREE. */
21435 static tree
21436 ix86_canonical_va_list_type (tree type)
21438 if (TARGET_64BIT)
21440 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21441 return ms_va_list_type_node;
21443 if ((TREE_CODE (type) == ARRAY_TYPE
21444 && integer_zerop (array_type_nelts (type)))
21445 || POINTER_TYPE_P (type))
21447 tree elem_type = TREE_TYPE (type);
21448 if (TREE_CODE (elem_type) == RECORD_TYPE
21449 && lookup_attribute ("sysv_abi va_list",
21450 TYPE_ATTRIBUTES (elem_type)))
21451 return sysv_va_list_type_node;
21454 return NULL_TREE;
21457 return std_canonical_va_list_type (type);
21460 /* Iterate through the target-specific builtin types for va_list.
21461 IDX denotes the iterator, *PTREE is set to the result type of
21462 the va_list builtin, and *PNAME to its internal type.
21463 Returns zero if there is no element for this index, otherwise
21464 IDX should be increased upon the next call.
21465 Note, do not iterate a base builtin's name like __builtin_va_list.
21466 Used from c_common_nodes_and_builtins. */
21468 static int
21469 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21471 if (TARGET_64BIT)
21473 switch (idx)
21475 default:
21476 break;
21478 case 0:
21479 *ptree = ms_va_list_type_node;
21480 *pname = "__builtin_ms_va_list";
21481 return 1;
21483 case 1:
21484 *ptree = sysv_va_list_type_node;
21485 *pname = "__builtin_sysv_va_list";
21486 return 1;
21490 return 0;
21493 #undef TARGET_SCHED_DISPATCH
21494 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21495 #undef TARGET_SCHED_DISPATCH_DO
21496 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21497 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21498 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21499 #undef TARGET_SCHED_REORDER
21500 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21501 #undef TARGET_SCHED_ADJUST_PRIORITY
21502 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21503 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21504 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21505 ix86_dependencies_evaluation_hook
21508 /* Implementation of reassociation_width target hook used by
21509 reassoc phase to identify parallelism level in reassociated
21510 tree. Statements tree_code is passed in OPC. Arguments type
21511 is passed in MODE. */
21513 static int
21514 ix86_reassociation_width (unsigned int op, machine_mode mode)
21516 int width = 1;
21517 /* Vector part. */
21518 if (VECTOR_MODE_P (mode))
21520 int div = 1;
21521 if (INTEGRAL_MODE_P (mode))
21522 width = ix86_cost->reassoc_vec_int;
21523 else if (FLOAT_MODE_P (mode))
21524 width = ix86_cost->reassoc_vec_fp;
21526 if (width == 1)
21527 return 1;
21529 /* Integer vector instructions execute in FP unit
21530 and can execute 3 additions and one multiplication per cycle. */
21531 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
21532 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21533 return 1;
21535 /* Account for targets that splits wide vectors into multiple parts. */
21536 if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
21537 div = GET_MODE_BITSIZE (mode) / 128;
21538 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21539 div = GET_MODE_BITSIZE (mode) / 64;
21540 width = (width + div - 1) / div;
21542 /* Scalar part. */
21543 else if (INTEGRAL_MODE_P (mode))
21544 width = ix86_cost->reassoc_int;
21545 else if (FLOAT_MODE_P (mode))
21546 width = ix86_cost->reassoc_fp;
21548 /* Avoid using too many registers in 32bit mode. */
21549 if (!TARGET_64BIT && width > 2)
21550 width = 2;
21551 return width;
21554 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21555 place emms and femms instructions. */
21557 static machine_mode
21558 ix86_preferred_simd_mode (scalar_mode mode)
21560 if (!TARGET_SSE)
21561 return word_mode;
21563 switch (mode)
21565 case E_QImode:
21566 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21567 return V64QImode;
21568 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21569 return V32QImode;
21570 else
21571 return V16QImode;
21573 case E_HImode:
21574 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
21575 return V32HImode;
21576 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21577 return V16HImode;
21578 else
21579 return V8HImode;
21581 case E_SImode:
21582 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21583 return V16SImode;
21584 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21585 return V8SImode;
21586 else
21587 return V4SImode;
21589 case E_DImode:
21590 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21591 return V8DImode;
21592 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21593 return V4DImode;
21594 else
21595 return V2DImode;
21597 case E_SFmode:
21598 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21599 return V16SFmode;
21600 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21601 return V8SFmode;
21602 else
21603 return V4SFmode;
21605 case E_DFmode:
21606 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21607 return V8DFmode;
21608 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21609 return V4DFmode;
21610 else if (TARGET_SSE2)
21611 return V2DFmode;
21612 /* FALLTHRU */
21614 default:
21615 return word_mode;
21619 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21620 vectors. If AVX512F is enabled then try vectorizing with 512bit,
21621 256bit and 128bit vectors. */
21623 static unsigned int
21624 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
21626 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
21628 modes->safe_push (V64QImode);
21629 modes->safe_push (V32QImode);
21630 modes->safe_push (V16QImode);
21632 else if (TARGET_AVX512F && all)
21634 modes->safe_push (V32QImode);
21635 modes->safe_push (V16QImode);
21636 modes->safe_push (V64QImode);
21638 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
21640 modes->safe_push (V32QImode);
21641 modes->safe_push (V16QImode);
21643 else if (TARGET_AVX && all)
21645 modes->safe_push (V16QImode);
21646 modes->safe_push (V32QImode);
21648 else if (TARGET_MMX_WITH_SSE)
21649 modes->safe_push (V16QImode);
21651 if (TARGET_MMX_WITH_SSE)
21652 modes->safe_push (V8QImode);
21654 return 0;
21657 /* Implemenation of targetm.vectorize.get_mask_mode. */
21659 static opt_machine_mode
21660 ix86_get_mask_mode (machine_mode data_mode)
21662 unsigned vector_size = GET_MODE_SIZE (data_mode);
21663 unsigned nunits = GET_MODE_NUNITS (data_mode);
21664 unsigned elem_size = vector_size / nunits;
21666 /* Scalar mask case. */
21667 if ((TARGET_AVX512F && vector_size == 64)
21668 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
21670 if (elem_size == 4
21671 || elem_size == 8
21672 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
21673 return smallest_int_mode_for_size (nunits);
21676 scalar_int_mode elem_mode
21677 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
21679 gcc_assert (elem_size * nunits == vector_size);
21681 return mode_for_vector (elem_mode, nunits);
21686 /* Return class of registers which could be used for pseudo of MODE
21687 and of class RCLASS for spilling instead of memory. Return NO_REGS
21688 if it is not possible or non-profitable. */
21690 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
21692 static reg_class_t
21693 ix86_spill_class (reg_class_t rclass, machine_mode mode)
21695 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21696 && TARGET_SSE2
21697 && TARGET_INTER_UNIT_MOVES_TO_VEC
21698 && TARGET_INTER_UNIT_MOVES_FROM_VEC
21699 && (mode == SImode || (TARGET_64BIT && mode == DImode))
21700 && INTEGER_CLASS_P (rclass))
21701 return ALL_SSE_REGS;
21702 return NO_REGS;
21705 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
21706 but returns a lower bound. */
21708 static unsigned int
21709 ix86_max_noce_ifcvt_seq_cost (edge e)
21711 bool predictable_p = predictable_edge_p (e);
21712 if (predictable_p)
21714 if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost)
21715 return param_max_rtl_if_conversion_predictable_cost;
21717 else
21719 if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost)
21720 return param_max_rtl_if_conversion_unpredictable_cost;
21723 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
21726 /* Return true if SEQ is a good candidate as a replacement for the
21727 if-convertible sequence described in IF_INFO. */
21729 static bool
21730 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
21732 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
21734 int cmov_cnt = 0;
21735 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21736 Maybe we should allow even more conditional moves as long as they
21737 are used far enough not to stall the CPU, or also consider
21738 IF_INFO->TEST_BB succ edge probabilities. */
21739 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
21741 rtx set = single_set (insn);
21742 if (!set)
21743 continue;
21744 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
21745 continue;
21746 rtx src = SET_SRC (set);
21747 machine_mode mode = GET_MODE (src);
21748 if (GET_MODE_CLASS (mode) != MODE_INT
21749 && GET_MODE_CLASS (mode) != MODE_FLOAT)
21750 continue;
21751 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
21752 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
21753 continue;
21754 /* insn is CMOV or FCMOV. */
21755 if (++cmov_cnt > 1)
21756 return false;
21759 return default_noce_conversion_profitable_p (seq, if_info);
21762 /* Implement targetm.vectorize.init_cost. */
21764 static void *
21765 ix86_init_cost (class loop *)
21767 unsigned *cost = XNEWVEC (unsigned, 3);
21768 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
21769 return cost;
21772 /* Implement targetm.vectorize.add_stmt_cost. */
21774 static unsigned
21775 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
21776 enum vect_cost_for_stmt kind,
21777 class _stmt_vec_info *stmt_info, tree vectype,
21778 int misalign,
21779 enum vect_cost_model_location where)
21781 unsigned *cost = (unsigned *) data;
21782 unsigned retval = 0;
21783 bool scalar_p
21784 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
21785 int stmt_cost = - 1;
21787 bool fp = false;
21788 machine_mode mode = scalar_p ? SImode : TImode;
21790 if (vectype != NULL)
21792 fp = FLOAT_TYPE_P (vectype);
21793 mode = TYPE_MODE (vectype);
21794 if (scalar_p)
21795 mode = TYPE_MODE (TREE_TYPE (vectype));
21798 if ((kind == vector_stmt || kind == scalar_stmt)
21799 && stmt_info
21800 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
21802 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
21803 /*machine_mode inner_mode = mode;
21804 if (VECTOR_MODE_P (mode))
21805 inner_mode = GET_MODE_INNER (mode);*/
21807 switch (subcode)
21809 case PLUS_EXPR:
21810 case POINTER_PLUS_EXPR:
21811 case MINUS_EXPR:
21812 if (kind == scalar_stmt)
21814 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21815 stmt_cost = ix86_cost->addss;
21816 else if (X87_FLOAT_MODE_P (mode))
21817 stmt_cost = ix86_cost->fadd;
21818 else
21819 stmt_cost = ix86_cost->add;
21821 else
21822 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
21823 : ix86_cost->sse_op);
21824 break;
21826 case MULT_EXPR:
21827 case WIDEN_MULT_EXPR:
21828 case MULT_HIGHPART_EXPR:
21829 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
21830 break;
21831 case NEGATE_EXPR:
21832 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21833 stmt_cost = ix86_cost->sse_op;
21834 else if (X87_FLOAT_MODE_P (mode))
21835 stmt_cost = ix86_cost->fchs;
21836 else if (VECTOR_MODE_P (mode))
21837 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21838 else
21839 stmt_cost = ix86_cost->add;
21840 break;
21841 case TRUNC_DIV_EXPR:
21842 case CEIL_DIV_EXPR:
21843 case FLOOR_DIV_EXPR:
21844 case ROUND_DIV_EXPR:
21845 case TRUNC_MOD_EXPR:
21846 case CEIL_MOD_EXPR:
21847 case FLOOR_MOD_EXPR:
21848 case RDIV_EXPR:
21849 case ROUND_MOD_EXPR:
21850 case EXACT_DIV_EXPR:
21851 stmt_cost = ix86_division_cost (ix86_cost, mode);
21852 break;
21854 case RSHIFT_EXPR:
21855 case LSHIFT_EXPR:
21856 case LROTATE_EXPR:
21857 case RROTATE_EXPR:
21859 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
21860 stmt_cost = ix86_shift_rotate_cost
21861 (ix86_cost, mode,
21862 TREE_CODE (op2) == INTEGER_CST,
21863 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
21864 true, false, false, NULL, NULL);
21866 break;
21867 case NOP_EXPR:
21868 /* Only sign-conversions are free. */
21869 if (tree_nop_conversion_p
21870 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
21871 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
21872 stmt_cost = 0;
21873 break;
21875 case BIT_IOR_EXPR:
21876 case ABS_EXPR:
21877 case ABSU_EXPR:
21878 case MIN_EXPR:
21879 case MAX_EXPR:
21880 case BIT_XOR_EXPR:
21881 case BIT_AND_EXPR:
21882 case BIT_NOT_EXPR:
21883 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21884 stmt_cost = ix86_cost->sse_op;
21885 else if (VECTOR_MODE_P (mode))
21886 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
21887 else
21888 stmt_cost = ix86_cost->add;
21889 break;
21890 default:
21891 break;
21895 combined_fn cfn;
21896 if ((kind == vector_stmt || kind == scalar_stmt)
21897 && stmt_info
21898 && stmt_info->stmt
21899 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
21900 switch (cfn)
21902 case CFN_FMA:
21903 stmt_cost = ix86_vec_cost (mode,
21904 mode == SFmode ? ix86_cost->fmass
21905 : ix86_cost->fmasd);
21906 break;
21907 default:
21908 break;
21911 /* If we do elementwise loads into a vector then we are bound by
21912 latency and execution resources for the many scalar loads
21913 (AGU and load ports). Try to account for this by scaling the
21914 construction cost by the number of elements involved. */
21915 if ((kind == vec_construct || kind == vec_to_scalar)
21916 && stmt_info
21917 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
21918 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
21919 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
21920 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
21922 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21923 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
21925 if (stmt_cost == -1)
21926 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
21928 /* Penalize DFmode vector operations for Bonnell. */
21929 if (TARGET_BONNELL && kind == vector_stmt
21930 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
21931 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
21933 /* Statements in an inner loop relative to the loop being
21934 vectorized are weighted more heavily. The value here is
21935 arbitrary and could potentially be improved with analysis. */
21936 if (where == vect_body && stmt_info
21937 && stmt_in_inner_loop_p (vinfo, stmt_info))
21938 count *= 50; /* FIXME. */
21940 retval = (unsigned) (count * stmt_cost);
21942 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
21943 for Silvermont as it has out of order integer pipeline and can execute
21944 2 scalar instruction per tick, but has in order SIMD pipeline. */
21945 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
21946 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
21948 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
21949 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
21950 retval = (retval * 17) / 10;
21953 cost[where] += retval;
21955 return retval;
21958 /* Implement targetm.vectorize.finish_cost. */
21960 static void
21961 ix86_finish_cost (void *data, unsigned *prologue_cost,
21962 unsigned *body_cost, unsigned *epilogue_cost)
21964 unsigned *cost = (unsigned *) data;
21965 *prologue_cost = cost[vect_prologue];
21966 *body_cost = cost[vect_body];
21967 *epilogue_cost = cost[vect_epilogue];
21970 /* Implement targetm.vectorize.destroy_cost_data. */
21972 static void
21973 ix86_destroy_cost_data (void *data)
21975 free (data);
21978 /* Validate target specific memory model bits in VAL. */
21980 static unsigned HOST_WIDE_INT
21981 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
21983 enum memmodel model = memmodel_from_int (val);
21984 bool strong;
21986 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
21987 |MEMMODEL_MASK)
21988 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
21990 warning (OPT_Winvalid_memory_model,
21991 "unknown architecture specific memory model");
21992 return MEMMODEL_SEQ_CST;
21994 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
21995 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
21997 warning (OPT_Winvalid_memory_model,
21998 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
21999 "memory model");
22000 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
22002 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
22004 warning (OPT_Winvalid_memory_model,
22005 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22006 "memory model");
22007 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
22009 return val;
22012 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22013 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22014 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
22015 or number of vecsize_mangle variants that should be emitted. */
22017 static int
22018 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22019 struct cgraph_simd_clone *clonei,
22020 tree base_type, int num)
22022 int ret = 1;
22024 if (clonei->simdlen
22025 && (clonei->simdlen < 2
22026 || clonei->simdlen > 1024
22027 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22029 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22030 "unsupported simdlen %d", clonei->simdlen);
22031 return 0;
22034 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22035 if (TREE_CODE (ret_type) != VOID_TYPE)
22036 switch (TYPE_MODE (ret_type))
22038 case E_QImode:
22039 case E_HImode:
22040 case E_SImode:
22041 case E_DImode:
22042 case E_SFmode:
22043 case E_DFmode:
22044 /* case E_SCmode: */
22045 /* case E_DCmode: */
22046 if (!AGGREGATE_TYPE_P (ret_type))
22047 break;
22048 /* FALLTHRU */
22049 default:
22050 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22051 "unsupported return type %qT for simd", ret_type);
22052 return 0;
22055 tree t;
22056 int i;
22057 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
22058 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
22060 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
22061 t && t != void_list_node; t = TREE_CHAIN (t), i++)
22063 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
22064 switch (TYPE_MODE (arg_type))
22066 case E_QImode:
22067 case E_HImode:
22068 case E_SImode:
22069 case E_DImode:
22070 case E_SFmode:
22071 case E_DFmode:
22072 /* case E_SCmode: */
22073 /* case E_DCmode: */
22074 if (!AGGREGATE_TYPE_P (arg_type))
22075 break;
22076 /* FALLTHRU */
22077 default:
22078 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
22079 break;
22080 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22081 "unsupported argument type %qT for simd", arg_type);
22082 return 0;
22086 if (!TREE_PUBLIC (node->decl))
22088 /* If the function isn't exported, we can pick up just one ISA
22089 for the clones. */
22090 if (TARGET_AVX512F)
22091 clonei->vecsize_mangle = 'e';
22092 else if (TARGET_AVX2)
22093 clonei->vecsize_mangle = 'd';
22094 else if (TARGET_AVX)
22095 clonei->vecsize_mangle = 'c';
22096 else
22097 clonei->vecsize_mangle = 'b';
22098 ret = 1;
22100 else
22102 clonei->vecsize_mangle = "bcde"[num];
22103 ret = 4;
22105 clonei->mask_mode = VOIDmode;
22106 switch (clonei->vecsize_mangle)
22108 case 'b':
22109 clonei->vecsize_int = 128;
22110 clonei->vecsize_float = 128;
22111 break;
22112 case 'c':
22113 clonei->vecsize_int = 128;
22114 clonei->vecsize_float = 256;
22115 break;
22116 case 'd':
22117 clonei->vecsize_int = 256;
22118 clonei->vecsize_float = 256;
22119 break;
22120 case 'e':
22121 clonei->vecsize_int = 512;
22122 clonei->vecsize_float = 512;
22123 if (TYPE_MODE (base_type) == QImode)
22124 clonei->mask_mode = DImode;
22125 else
22126 clonei->mask_mode = SImode;
22127 break;
22129 if (clonei->simdlen == 0)
22131 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
22132 clonei->simdlen = clonei->vecsize_int;
22133 else
22134 clonei->simdlen = clonei->vecsize_float;
22135 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
22137 else if (clonei->simdlen > 16)
22139 /* For compatibility with ICC, use the same upper bounds
22140 for simdlen. In particular, for CTYPE below, use the return type,
22141 unless the function returns void, in that case use the characteristic
22142 type. If it is possible for given SIMDLEN to pass CTYPE value
22143 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22144 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22145 emit corresponding clone. */
22146 tree ctype = ret_type;
22147 if (TREE_CODE (ret_type) == VOID_TYPE)
22148 ctype = base_type;
22149 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
22150 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
22151 cnt /= clonei->vecsize_int;
22152 else
22153 cnt /= clonei->vecsize_float;
22154 if (cnt > (TARGET_64BIT ? 16 : 8))
22156 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22157 "unsupported simdlen %d", clonei->simdlen);
22158 return 0;
22161 return ret;
22164 /* If SIMD clone NODE can't be used in a vectorized loop
22165 in current function, return -1, otherwise return a badness of using it
22166 (0 if it is most desirable from vecsize_mangle point of view, 1
22167 slightly less desirable, etc.). */
22169 static int
22170 ix86_simd_clone_usable (struct cgraph_node *node)
22172 switch (node->simdclone->vecsize_mangle)
22174 case 'b':
22175 if (!TARGET_SSE2)
22176 return -1;
22177 if (!TARGET_AVX)
22178 return 0;
22179 return TARGET_AVX2 ? 2 : 1;
22180 case 'c':
22181 if (!TARGET_AVX)
22182 return -1;
22183 return TARGET_AVX2 ? 1 : 0;
22184 case 'd':
22185 if (!TARGET_AVX2)
22186 return -1;
22187 return 0;
22188 case 'e':
22189 if (!TARGET_AVX512F)
22190 return -1;
22191 return 0;
22192 default:
22193 gcc_unreachable ();
22197 /* This function adjusts the unroll factor based on
22198 the hardware capabilities. For ex, bdver3 has
22199 a loop buffer which makes unrolling of smaller
22200 loops less important. This function decides the
22201 unroll factor using number of memory references
22202 (value 32 is used) as a heuristic. */
22204 static unsigned
22205 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
22207 basic_block *bbs;
22208 rtx_insn *insn;
22209 unsigned i;
22210 unsigned mem_count = 0;
22212 if (!TARGET_ADJUST_UNROLL)
22213 return nunroll;
22215 /* Count the number of memory references within the loop body.
22216 This value determines the unrolling factor for bdver3 and bdver4
22217 architectures. */
22218 subrtx_iterator::array_type array;
22219 bbs = get_loop_body (loop);
22220 for (i = 0; i < loop->num_nodes; i++)
22221 FOR_BB_INSNS (bbs[i], insn)
22222 if (NONDEBUG_INSN_P (insn))
22223 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
22224 if (const_rtx x = *iter)
22225 if (MEM_P (x))
22227 machine_mode mode = GET_MODE (x);
22228 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22229 if (n_words > 4)
22230 mem_count += 2;
22231 else
22232 mem_count += 1;
22234 free (bbs);
22236 if (mem_count && mem_count <=32)
22237 return MIN (nunroll, 32 / mem_count);
22239 return nunroll;
22243 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
22245 static bool
22246 ix86_float_exceptions_rounding_supported_p (void)
22248 /* For x87 floating point with standard excess precision handling,
22249 there is no adddf3 pattern (since x87 floating point only has
22250 XFmode operations) so the default hook implementation gets this
22251 wrong. */
22252 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
22255 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
22257 static void
22258 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
22260 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
22261 return;
22262 tree exceptions_var = create_tmp_var_raw (integer_type_node);
22263 if (TARGET_80387)
22265 tree fenv_index_type = build_index_type (size_int (6));
22266 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
22267 tree fenv_var = create_tmp_var_raw (fenv_type);
22268 TREE_ADDRESSABLE (fenv_var) = 1;
22269 tree fenv_ptr = build_pointer_type (fenv_type);
22270 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
22271 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
22272 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
22273 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
22274 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22275 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22276 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22277 tree hold_fnclex = build_call_expr (fnclex, 0);
22278 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22279 NULL_TREE, NULL_TREE);
22280 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22281 hold_fnclex);
22282 *clear = build_call_expr (fnclex, 0);
22283 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22284 tree fnstsw_call = build_call_expr (fnstsw, 0);
22285 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
22286 fnstsw_call, NULL_TREE, NULL_TREE);
22287 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22288 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
22289 exceptions_var, exceptions_x87,
22290 NULL_TREE, NULL_TREE);
22291 *update = build2 (COMPOUND_EXPR, integer_type_node,
22292 sw_mod, update_mod);
22293 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22294 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22296 if (TARGET_SSE && TARGET_SSE_MATH)
22298 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22299 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22300 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22301 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22302 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22303 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
22304 mxcsr_orig_var, stmxcsr_hold_call,
22305 NULL_TREE, NULL_TREE);
22306 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22307 mxcsr_orig_var,
22308 build_int_cst (unsigned_type_node, 0x1f80));
22309 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22310 build_int_cst (unsigned_type_node, 0xffffffc0));
22311 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
22312 mxcsr_mod_var, hold_mod_val,
22313 NULL_TREE, NULL_TREE);
22314 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22315 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22316 hold_assign_orig, hold_assign_mod);
22317 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22318 ldmxcsr_hold_call);
22319 if (*hold)
22320 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22321 else
22322 *hold = hold_all;
22323 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22324 if (*clear)
22325 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22326 ldmxcsr_clear_call);
22327 else
22328 *clear = ldmxcsr_clear_call;
22329 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22330 tree exceptions_sse = fold_convert (integer_type_node,
22331 stxmcsr_update_call);
22332 if (*update)
22334 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22335 exceptions_var, exceptions_sse);
22336 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22337 exceptions_var, exceptions_mod);
22338 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22339 exceptions_assign);
22341 else
22342 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
22343 exceptions_sse, NULL_TREE, NULL_TREE);
22344 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22345 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22346 ldmxcsr_update_call);
22348 tree atomic_feraiseexcept
22349 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22350 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22351 1, exceptions_var);
22352 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22353 atomic_feraiseexcept_call);
22356 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22357 /* For i386, common symbol is local only for non-PIE binaries. For
22358 x86-64, common symbol is local only for non-PIE binaries or linker
22359 supports copy reloc in PIE binaries. */
22361 static bool
22362 ix86_binds_local_p (const_tree exp)
22364 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22365 (!flag_pic
22366 || (TARGET_64BIT
22367 && HAVE_LD_PIE_COPYRELOC != 0)));
22369 #endif
22371 /* If MEM is in the form of [base+offset], extract the two parts
22372 of address and set to BASE and OFFSET, otherwise return false. */
22374 static bool
22375 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22377 rtx addr;
22379 gcc_assert (MEM_P (mem));
22381 addr = XEXP (mem, 0);
22383 if (GET_CODE (addr) == CONST)
22384 addr = XEXP (addr, 0);
22386 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22388 *base = addr;
22389 *offset = const0_rtx;
22390 return true;
22393 if (GET_CODE (addr) == PLUS
22394 && (REG_P (XEXP (addr, 0))
22395 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22396 && CONST_INT_P (XEXP (addr, 1)))
22398 *base = XEXP (addr, 0);
22399 *offset = XEXP (addr, 1);
22400 return true;
22403 return false;
22406 /* Given OPERANDS of consecutive load/store, check if we can merge
22407 them into move multiple. LOAD is true if they are load instructions.
22408 MODE is the mode of memory operands. */
22410 bool
22411 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22412 machine_mode mode)
22414 HOST_WIDE_INT offval_1, offval_2, msize;
22415 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22417 if (load)
22419 mem_1 = operands[1];
22420 mem_2 = operands[3];
22421 reg_1 = operands[0];
22422 reg_2 = operands[2];
22424 else
22426 mem_1 = operands[0];
22427 mem_2 = operands[2];
22428 reg_1 = operands[1];
22429 reg_2 = operands[3];
22432 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22434 if (REGNO (reg_1) != REGNO (reg_2))
22435 return false;
22437 /* Check if the addresses are in the form of [base+offset]. */
22438 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22439 return false;
22440 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22441 return false;
22443 /* Check if the bases are the same. */
22444 if (!rtx_equal_p (base_1, base_2))
22445 return false;
22447 offval_1 = INTVAL (offset_1);
22448 offval_2 = INTVAL (offset_2);
22449 msize = GET_MODE_SIZE (mode);
22450 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22451 if (offval_1 + msize != offval_2)
22452 return false;
22454 return true;
22457 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22459 static bool
22460 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22461 optimization_type opt_type)
22463 switch (op)
22465 case asin_optab:
22466 case acos_optab:
22467 case log1p_optab:
22468 case exp_optab:
22469 case exp10_optab:
22470 case exp2_optab:
22471 case expm1_optab:
22472 case ldexp_optab:
22473 case scalb_optab:
22474 case round_optab:
22475 return opt_type == OPTIMIZE_FOR_SPEED;
22477 case rint_optab:
22478 if (SSE_FLOAT_MODE_P (mode1)
22479 && TARGET_SSE_MATH
22480 && !flag_trapping_math
22481 && !TARGET_SSE4_1)
22482 return opt_type == OPTIMIZE_FOR_SPEED;
22483 return true;
22485 case floor_optab:
22486 case ceil_optab:
22487 case btrunc_optab:
22488 if (SSE_FLOAT_MODE_P (mode1)
22489 && TARGET_SSE_MATH
22490 && !flag_trapping_math
22491 && TARGET_SSE4_1)
22492 return true;
22493 return opt_type == OPTIMIZE_FOR_SPEED;
22495 case rsqrt_optab:
22496 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22498 default:
22499 return true;
22503 /* Address space support.
22505 This is not "far pointers" in the 16-bit sense, but an easy way
22506 to use %fs and %gs segment prefixes. Therefore:
22508 (a) All address spaces have the same modes,
22509 (b) All address spaces have the same addresss forms,
22510 (c) While %fs and %gs are technically subsets of the generic
22511 address space, they are probably not subsets of each other.
22512 (d) Since we have no access to the segment base register values
22513 without resorting to a system call, we cannot convert a
22514 non-default address space to a default address space.
22515 Therefore we do not claim %fs or %gs are subsets of generic.
22517 Therefore we can (mostly) use the default hooks. */
22519 /* All use of segmentation is assumed to make address 0 valid. */
22521 static bool
22522 ix86_addr_space_zero_address_valid (addr_space_t as)
22524 return as != ADDR_SPACE_GENERIC;
22527 static void
22528 ix86_init_libfuncs (void)
22530 if (TARGET_64BIT)
22532 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22533 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22535 else
22537 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22538 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22541 #if TARGET_MACHO
22542 darwin_rename_builtins ();
22543 #endif
22546 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22547 FPU, assume that the fpcw is set to extended precision; when using
22548 only SSE, rounding is correct; when using both SSE and the FPU,
22549 the rounding precision is indeterminate, since either may be chosen
22550 apparently at random. */
22552 static enum flt_eval_method
22553 ix86_excess_precision (enum excess_precision_type type)
22555 switch (type)
22557 case EXCESS_PRECISION_TYPE_FAST:
22558 /* The fastest type to promote to will always be the native type,
22559 whether that occurs with implicit excess precision or
22560 otherwise. */
22561 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22562 case EXCESS_PRECISION_TYPE_STANDARD:
22563 case EXCESS_PRECISION_TYPE_IMPLICIT:
22564 /* Otherwise, the excess precision we want when we are
22565 in a standards compliant mode, and the implicit precision we
22566 provide would be identical were it not for the unpredictable
22567 cases. */
22568 if (!TARGET_80387)
22569 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22570 else if (!TARGET_MIX_SSE_I387)
22572 if (!(TARGET_SSE && TARGET_SSE_MATH))
22573 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
22574 else if (TARGET_SSE2)
22575 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
22578 /* If we are in standards compliant mode, but we know we will
22579 calculate in unpredictable precision, return
22580 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
22581 excess precision if the target can't guarantee it will honor
22582 it. */
22583 return (type == EXCESS_PRECISION_TYPE_STANDARD
22584 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22585 : FLT_EVAL_METHOD_UNPREDICTABLE);
22586 default:
22587 gcc_unreachable ();
22590 return FLT_EVAL_METHOD_UNPREDICTABLE;
22593 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
22594 decrements by exactly 2 no matter what the position was, there is no pushb.
22596 But as CIE data alignment factor on this arch is -4 for 32bit targets
22597 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22598 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
22600 poly_int64
22601 ix86_push_rounding (poly_int64 bytes)
22603 return ROUND_UP (bytes, UNITS_PER_WORD);
22606 /* Target-specific selftests. */
22608 #if CHECKING_P
22610 namespace selftest {
22612 /* Verify that hard regs are dumped as expected (in compact mode). */
22614 static void
22615 ix86_test_dumping_hard_regs ()
22617 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
22618 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
22621 /* Test dumping an insn with repeated references to the same SCRATCH,
22622 to verify the rtx_reuse code. */
22624 static void
22625 ix86_test_dumping_memory_blockage ()
22627 set_new_first_and_last_insn (NULL, NULL);
22629 rtx pat = gen_memory_blockage ();
22630 rtx_reuse_manager r;
22631 r.preprocess (pat);
22633 /* Verify that the repeated references to the SCRATCH show use
22634 reuse IDS. The first should be prefixed with a reuse ID,
22635 and the second should be dumped as a "reuse_rtx" of that ID.
22636 The expected string assumes Pmode == DImode. */
22637 if (Pmode == DImode)
22638 ASSERT_RTL_DUMP_EQ_WITH_REUSE
22639 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
22640 " (unspec:BLK [\n"
22641 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
22642 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
22645 /* Verify loading an RTL dump; specifically a dump of copying
22646 a param on x86_64 from a hard reg into the frame.
22647 This test is target-specific since the dump contains target-specific
22648 hard reg names. */
22650 static void
22651 ix86_test_loading_dump_fragment_1 ()
22653 rtl_dump_test t (SELFTEST_LOCATION,
22654 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22656 rtx_insn *insn = get_insn_by_uid (1);
22658 /* The block structure and indentation here is purely for
22659 readability; it mirrors the structure of the rtx. */
22660 tree mem_expr;
22662 rtx pat = PATTERN (insn);
22663 ASSERT_EQ (SET, GET_CODE (pat));
22665 rtx dest = SET_DEST (pat);
22666 ASSERT_EQ (MEM, GET_CODE (dest));
22667 /* Verify the "/c" was parsed. */
22668 ASSERT_TRUE (RTX_FLAG (dest, call));
22669 ASSERT_EQ (SImode, GET_MODE (dest));
22671 rtx addr = XEXP (dest, 0);
22672 ASSERT_EQ (PLUS, GET_CODE (addr));
22673 ASSERT_EQ (DImode, GET_MODE (addr));
22675 rtx lhs = XEXP (addr, 0);
22676 /* Verify that the "frame" REG was consolidated. */
22677 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
22680 rtx rhs = XEXP (addr, 1);
22681 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
22682 ASSERT_EQ (-4, INTVAL (rhs));
22685 /* Verify the "[1 i+0 S4 A32]" was parsed. */
22686 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
22687 /* "i" should have been handled by synthesizing a global int
22688 variable named "i". */
22689 mem_expr = MEM_EXPR (dest);
22690 ASSERT_NE (mem_expr, NULL);
22691 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
22692 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
22693 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
22694 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
22695 /* "+0". */
22696 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
22697 ASSERT_EQ (0, MEM_OFFSET (dest));
22698 /* "S4". */
22699 ASSERT_EQ (4, MEM_SIZE (dest));
22700 /* "A32. */
22701 ASSERT_EQ (32, MEM_ALIGN (dest));
22704 rtx src = SET_SRC (pat);
22705 ASSERT_EQ (REG, GET_CODE (src));
22706 ASSERT_EQ (SImode, GET_MODE (src));
22707 ASSERT_EQ (5, REGNO (src));
22708 tree reg_expr = REG_EXPR (src);
22709 /* "i" here should point to the same var as for the MEM_EXPR. */
22710 ASSERT_EQ (reg_expr, mem_expr);
22715 /* Verify that the RTL loader copes with a call_insn dump.
22716 This test is target-specific since the dump contains a target-specific
22717 hard reg name. */
22719 static void
22720 ix86_test_loading_call_insn ()
22722 /* The test dump includes register "xmm0", where requires TARGET_SSE
22723 to exist. */
22724 if (!TARGET_SSE)
22725 return;
22727 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
22729 rtx_insn *insn = get_insns ();
22730 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
22732 /* "/j". */
22733 ASSERT_TRUE (RTX_FLAG (insn, jump));
22735 rtx pat = PATTERN (insn);
22736 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
22738 /* Verify REG_NOTES. */
22740 /* "(expr_list:REG_CALL_DECL". */
22741 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
22742 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
22743 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
22745 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
22746 rtx_expr_list *note1 = note0->next ();
22747 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
22749 ASSERT_EQ (NULL, note1->next ());
22752 /* Verify CALL_INSN_FUNCTION_USAGE. */
22754 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
22755 rtx_expr_list *usage
22756 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
22757 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
22758 ASSERT_EQ (DFmode, GET_MODE (usage));
22759 ASSERT_EQ (USE, GET_CODE (usage->element ()));
22760 ASSERT_EQ (NULL, usage->next ());
22764 /* Verify that the RTL loader copes a dump from print_rtx_function.
22765 This test is target-specific since the dump contains target-specific
22766 hard reg names. */
22768 static void
22769 ix86_test_loading_full_dump ()
22771 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
22773 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22775 rtx_insn *insn_1 = get_insn_by_uid (1);
22776 ASSERT_EQ (NOTE, GET_CODE (insn_1));
22778 rtx_insn *insn_7 = get_insn_by_uid (7);
22779 ASSERT_EQ (INSN, GET_CODE (insn_7));
22780 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
22782 rtx_insn *insn_15 = get_insn_by_uid (15);
22783 ASSERT_EQ (INSN, GET_CODE (insn_15));
22784 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
22786 /* Verify crtl->return_rtx. */
22787 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
22788 ASSERT_EQ (0, REGNO (crtl->return_rtx));
22789 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
22792 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22793 In particular, verify that it correctly loads the 2nd operand.
22794 This test is target-specific since these are machine-specific
22795 operands (and enums). */
22797 static void
22798 ix86_test_loading_unspec ()
22800 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
22802 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
22804 ASSERT_TRUE (cfun);
22806 /* Test of an UNSPEC. */
22807 rtx_insn *insn = get_insns ();
22808 ASSERT_EQ (INSN, GET_CODE (insn));
22809 rtx set = single_set (insn);
22810 ASSERT_NE (NULL, set);
22811 rtx dst = SET_DEST (set);
22812 ASSERT_EQ (MEM, GET_CODE (dst));
22813 rtx src = SET_SRC (set);
22814 ASSERT_EQ (UNSPEC, GET_CODE (src));
22815 ASSERT_EQ (BLKmode, GET_MODE (src));
22816 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
22818 rtx v0 = XVECEXP (src, 0, 0);
22820 /* Verify that the two uses of the first SCRATCH have pointer
22821 equality. */
22822 rtx scratch_a = XEXP (dst, 0);
22823 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
22825 rtx scratch_b = XEXP (v0, 0);
22826 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
22828 ASSERT_EQ (scratch_a, scratch_b);
22830 /* Verify that the two mems are thus treated as equal. */
22831 ASSERT_TRUE (rtx_equal_p (dst, v0));
22833 /* Verify that the insn is recognized. */
22834 ASSERT_NE(-1, recog_memoized (insn));
22836 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
22837 insn = NEXT_INSN (insn);
22838 ASSERT_EQ (INSN, GET_CODE (insn));
22840 set = single_set (insn);
22841 ASSERT_NE (NULL, set);
22843 src = SET_SRC (set);
22844 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
22845 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
22848 /* Run all target-specific selftests. */
22850 static void
22851 ix86_run_selftests (void)
22853 ix86_test_dumping_hard_regs ();
22854 ix86_test_dumping_memory_blockage ();
22856 /* Various tests of loading RTL dumps, here because they contain
22857 ix86-isms (e.g. names of hard regs). */
22858 ix86_test_loading_dump_fragment_1 ();
22859 ix86_test_loading_call_insn ();
22860 ix86_test_loading_full_dump ();
22861 ix86_test_loading_unspec ();
22864 } // namespace selftest
22866 #endif /* CHECKING_P */
22868 /* Initialize the GCC target structure. */
22869 #undef TARGET_RETURN_IN_MEMORY
22870 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22872 #undef TARGET_LEGITIMIZE_ADDRESS
22873 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22875 #undef TARGET_ATTRIBUTE_TABLE
22876 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22877 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22878 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22879 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22880 # undef TARGET_MERGE_DECL_ATTRIBUTES
22881 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22882 #endif
22884 #undef TARGET_COMP_TYPE_ATTRIBUTES
22885 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22887 #undef TARGET_INIT_BUILTINS
22888 #define TARGET_INIT_BUILTINS ix86_init_builtins
22889 #undef TARGET_BUILTIN_DECL
22890 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22891 #undef TARGET_EXPAND_BUILTIN
22892 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22894 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22895 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22896 ix86_builtin_vectorized_function
22898 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22899 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22901 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22902 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22904 #undef TARGET_BUILTIN_RECIPROCAL
22905 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22907 #undef TARGET_ASM_FUNCTION_EPILOGUE
22908 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22910 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
22911 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
22912 ix86_print_patchable_function_entry
22914 #undef TARGET_ENCODE_SECTION_INFO
22915 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22916 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22917 #else
22918 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22919 #endif
22921 #undef TARGET_ASM_OPEN_PAREN
22922 #define TARGET_ASM_OPEN_PAREN ""
22923 #undef TARGET_ASM_CLOSE_PAREN
22924 #define TARGET_ASM_CLOSE_PAREN ""
22926 #undef TARGET_ASM_BYTE_OP
22927 #define TARGET_ASM_BYTE_OP ASM_BYTE
22929 #undef TARGET_ASM_ALIGNED_HI_OP
22930 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22931 #undef TARGET_ASM_ALIGNED_SI_OP
22932 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22933 #ifdef ASM_QUAD
22934 #undef TARGET_ASM_ALIGNED_DI_OP
22935 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22936 #endif
22938 #undef TARGET_PROFILE_BEFORE_PROLOGUE
22939 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
22941 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
22942 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
22944 #undef TARGET_ASM_UNALIGNED_HI_OP
22945 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22946 #undef TARGET_ASM_UNALIGNED_SI_OP
22947 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22948 #undef TARGET_ASM_UNALIGNED_DI_OP
22949 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22951 #undef TARGET_PRINT_OPERAND
22952 #define TARGET_PRINT_OPERAND ix86_print_operand
22953 #undef TARGET_PRINT_OPERAND_ADDRESS
22954 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
22955 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
22956 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
22957 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
22958 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
22960 #undef TARGET_SCHED_INIT_GLOBAL
22961 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
22962 #undef TARGET_SCHED_ADJUST_COST
22963 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22964 #undef TARGET_SCHED_ISSUE_RATE
22965 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22966 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22967 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22968 ia32_multipass_dfa_lookahead
22969 #undef TARGET_SCHED_MACRO_FUSION_P
22970 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
22971 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
22972 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
22974 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22975 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22977 #undef TARGET_MEMMODEL_CHECK
22978 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
22980 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
22981 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
22983 #ifdef HAVE_AS_TLS
22984 #undef TARGET_HAVE_TLS
22985 #define TARGET_HAVE_TLS true
22986 #endif
22987 #undef TARGET_CANNOT_FORCE_CONST_MEM
22988 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22989 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22990 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
22992 #undef TARGET_DELEGITIMIZE_ADDRESS
22993 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22995 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
22996 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
22998 #undef TARGET_MS_BITFIELD_LAYOUT_P
22999 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23001 #if TARGET_MACHO
23002 #undef TARGET_BINDS_LOCAL_P
23003 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23004 #else
23005 #undef TARGET_BINDS_LOCAL_P
23006 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23007 #endif
23008 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23009 #undef TARGET_BINDS_LOCAL_P
23010 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23011 #endif
23013 #undef TARGET_ASM_OUTPUT_MI_THUNK
23014 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23015 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23016 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23018 #undef TARGET_ASM_FILE_START
23019 #define TARGET_ASM_FILE_START x86_file_start
23021 #undef TARGET_OPTION_OVERRIDE
23022 #define TARGET_OPTION_OVERRIDE ix86_option_override
23024 #undef TARGET_REGISTER_MOVE_COST
23025 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23026 #undef TARGET_MEMORY_MOVE_COST
23027 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23028 #undef TARGET_RTX_COSTS
23029 #define TARGET_RTX_COSTS ix86_rtx_costs
23030 #undef TARGET_ADDRESS_COST
23031 #define TARGET_ADDRESS_COST ix86_address_cost
23033 #undef TARGET_FLAGS_REGNUM
23034 #define TARGET_FLAGS_REGNUM FLAGS_REG
23035 #undef TARGET_FIXED_CONDITION_CODE_REGS
23036 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23037 #undef TARGET_CC_MODES_COMPATIBLE
23038 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23040 #undef TARGET_MACHINE_DEPENDENT_REORG
23041 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23043 #undef TARGET_BUILD_BUILTIN_VA_LIST
23044 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23046 #undef TARGET_FOLD_BUILTIN
23047 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23049 #undef TARGET_GIMPLE_FOLD_BUILTIN
23050 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23052 #undef TARGET_COMPARE_VERSION_PRIORITY
23053 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23055 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23056 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23057 ix86_generate_version_dispatcher_body
23059 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23060 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23061 ix86_get_function_versions_dispatcher
23063 #undef TARGET_ENUM_VA_LIST_P
23064 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23066 #undef TARGET_FN_ABI_VA_LIST
23067 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23069 #undef TARGET_CANONICAL_VA_LIST_TYPE
23070 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23072 #undef TARGET_EXPAND_BUILTIN_VA_START
23073 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23075 #undef TARGET_MD_ASM_ADJUST
23076 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23078 #undef TARGET_C_EXCESS_PRECISION
23079 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
23080 #undef TARGET_PROMOTE_PROTOTYPES
23081 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23082 #undef TARGET_SETUP_INCOMING_VARARGS
23083 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23084 #undef TARGET_MUST_PASS_IN_STACK
23085 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23086 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23087 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23088 #undef TARGET_FUNCTION_ARG_ADVANCE
23089 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23090 #undef TARGET_FUNCTION_ARG
23091 #define TARGET_FUNCTION_ARG ix86_function_arg
23092 #undef TARGET_INIT_PIC_REG
23093 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23094 #undef TARGET_USE_PSEUDO_PIC_REG
23095 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23096 #undef TARGET_FUNCTION_ARG_BOUNDARY
23097 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23098 #undef TARGET_PASS_BY_REFERENCE
23099 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23100 #undef TARGET_INTERNAL_ARG_POINTER
23101 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23102 #undef TARGET_UPDATE_STACK_BOUNDARY
23103 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23104 #undef TARGET_GET_DRAP_RTX
23105 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23106 #undef TARGET_STRICT_ARGUMENT_NAMING
23107 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23108 #undef TARGET_STATIC_CHAIN
23109 #define TARGET_STATIC_CHAIN ix86_static_chain
23110 #undef TARGET_TRAMPOLINE_INIT
23111 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23112 #undef TARGET_RETURN_POPS_ARGS
23113 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23115 #undef TARGET_WARN_FUNC_RETURN
23116 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23118 #undef TARGET_LEGITIMATE_COMBINED_INSN
23119 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23121 #undef TARGET_ASAN_SHADOW_OFFSET
23122 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23124 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23125 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23127 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23128 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23130 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23131 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23133 #undef TARGET_C_MODE_FOR_SUFFIX
23134 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23136 #ifdef HAVE_AS_TLS
23137 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23138 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23139 #endif
23141 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23142 #undef TARGET_INSERT_ATTRIBUTES
23143 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23144 #endif
23146 #undef TARGET_MANGLE_TYPE
23147 #define TARGET_MANGLE_TYPE ix86_mangle_type
23149 #undef TARGET_STACK_PROTECT_GUARD
23150 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23152 #if !TARGET_MACHO
23153 #undef TARGET_STACK_PROTECT_FAIL
23154 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23155 #endif
23157 #undef TARGET_FUNCTION_VALUE
23158 #define TARGET_FUNCTION_VALUE ix86_function_value
23160 #undef TARGET_FUNCTION_VALUE_REGNO_P
23161 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23163 #undef TARGET_PROMOTE_FUNCTION_MODE
23164 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23166 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23167 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23169 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23170 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23172 #undef TARGET_INSTANTIATE_DECLS
23173 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23175 #undef TARGET_SECONDARY_RELOAD
23176 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23177 #undef TARGET_SECONDARY_MEMORY_NEEDED
23178 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23179 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23180 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23182 #undef TARGET_CLASS_MAX_NREGS
23183 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23185 #undef TARGET_PREFERRED_RELOAD_CLASS
23186 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23187 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23188 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23189 #undef TARGET_CLASS_LIKELY_SPILLED_P
23190 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23192 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23193 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23194 ix86_builtin_vectorization_cost
23195 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23196 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23197 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23198 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23199 ix86_preferred_simd_mode
23200 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23201 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23202 ix86_split_reduction
23203 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23204 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23205 ix86_autovectorize_vector_modes
23206 #undef TARGET_VECTORIZE_GET_MASK_MODE
23207 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23208 #undef TARGET_VECTORIZE_INIT_COST
23209 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23210 #undef TARGET_VECTORIZE_ADD_STMT_COST
23211 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23212 #undef TARGET_VECTORIZE_FINISH_COST
23213 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23214 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23215 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23217 #undef TARGET_SET_CURRENT_FUNCTION
23218 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23220 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23221 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23223 #undef TARGET_OPTION_SAVE
23224 #define TARGET_OPTION_SAVE ix86_function_specific_save
23226 #undef TARGET_OPTION_RESTORE
23227 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23229 #undef TARGET_OPTION_POST_STREAM_IN
23230 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23232 #undef TARGET_OPTION_PRINT
23233 #define TARGET_OPTION_PRINT ix86_function_specific_print
23235 #undef TARGET_OPTION_FUNCTION_VERSIONS
23236 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23238 #undef TARGET_CAN_INLINE_P
23239 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23241 #undef TARGET_LEGITIMATE_ADDRESS_P
23242 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23244 #undef TARGET_REGISTER_PRIORITY
23245 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23247 #undef TARGET_REGISTER_USAGE_LEVELING_P
23248 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23250 #undef TARGET_LEGITIMATE_CONSTANT_P
23251 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23253 #undef TARGET_COMPUTE_FRAME_LAYOUT
23254 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23256 #undef TARGET_FRAME_POINTER_REQUIRED
23257 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23259 #undef TARGET_CAN_ELIMINATE
23260 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23262 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23263 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23265 #undef TARGET_ASM_CODE_END
23266 #define TARGET_ASM_CODE_END ix86_code_end
23268 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23269 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23271 #undef TARGET_CANONICALIZE_COMPARISON
23272 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23274 #undef TARGET_LOOP_UNROLL_ADJUST
23275 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23277 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23278 #undef TARGET_SPILL_CLASS
23279 #define TARGET_SPILL_CLASS ix86_spill_class
23281 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23282 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23283 ix86_simd_clone_compute_vecsize_and_simdlen
23285 #undef TARGET_SIMD_CLONE_ADJUST
23286 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23288 #undef TARGET_SIMD_CLONE_USABLE
23289 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23291 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23292 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23294 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23295 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23296 ix86_float_exceptions_rounding_supported_p
23298 #undef TARGET_MODE_EMIT
23299 #define TARGET_MODE_EMIT ix86_emit_mode_set
23301 #undef TARGET_MODE_NEEDED
23302 #define TARGET_MODE_NEEDED ix86_mode_needed
23304 #undef TARGET_MODE_AFTER
23305 #define TARGET_MODE_AFTER ix86_mode_after
23307 #undef TARGET_MODE_ENTRY
23308 #define TARGET_MODE_ENTRY ix86_mode_entry
23310 #undef TARGET_MODE_EXIT
23311 #define TARGET_MODE_EXIT ix86_mode_exit
23313 #undef TARGET_MODE_PRIORITY
23314 #define TARGET_MODE_PRIORITY ix86_mode_priority
23316 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23317 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23319 #undef TARGET_OFFLOAD_OPTIONS
23320 #define TARGET_OFFLOAD_OPTIONS \
23321 ix86_offload_options
23323 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23324 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23326 #undef TARGET_OPTAB_SUPPORTED_P
23327 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23329 #undef TARGET_HARD_REGNO_SCRATCH_OK
23330 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23332 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23333 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23335 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23336 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23338 #undef TARGET_INIT_LIBFUNCS
23339 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23341 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23342 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23344 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23345 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23347 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23348 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23350 #undef TARGET_HARD_REGNO_NREGS
23351 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23352 #undef TARGET_HARD_REGNO_MODE_OK
23353 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23355 #undef TARGET_MODES_TIEABLE_P
23356 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23358 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23359 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23360 ix86_hard_regno_call_part_clobbered
23362 #undef TARGET_CAN_CHANGE_MODE_CLASS
23363 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23365 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
23366 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
23368 #undef TARGET_STATIC_RTX_ALIGNMENT
23369 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23370 #undef TARGET_CONSTANT_ALIGNMENT
23371 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23373 #undef TARGET_EMPTY_RECORD_P
23374 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23376 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23377 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23379 #undef TARGET_GET_MULTILIB_ABI_NAME
23380 #define TARGET_GET_MULTILIB_ABI_NAME \
23381 ix86_get_multilib_abi_name
23383 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23385 #ifdef OPTION_GLIBC
23386 if (OPTION_GLIBC)
23387 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23388 else
23389 return false;
23390 #else
23391 return false;
23392 #endif
23395 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23396 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23398 #if CHECKING_P
23399 #undef TARGET_RUN_TARGET_SELFTESTS
23400 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23401 #endif /* #if CHECKING_P */
23403 struct gcc_target targetm = TARGET_INITIALIZER;
23405 #include "gt-i386.h"